Pol  Revision:f37d500
compiler.cpp
Go to the documentation of this file.
1 
20 #include "compiler.h"
21 
22 #include <cstddef>
23 #include <cstdio>
24 #include <cstring>
25 #include <exception>
26 #include <ostream>
27 #include <stdexcept>
28 #include <stdlib.h>
29 
30 #include "../clib/clib.h"
31 #include "../clib/filecont.h"
32 #include "../clib/fileutil.h"
33 #include "../clib/logfacility.h"
34 #include "../clib/passert.h"
35 #include "../clib/stlutil.h"
36 #include "../clib/strutil.h"
37 #include "../plib/pkg.h"
38 #include "compctx.h"
39 #include "compilercfg.h"
40 #include "eprog.h"
41 #include "fmodule.h"
42 #include "modules.h"
43 #include "objmembers.h"
44 #include "symcont.h"
45 #include "token.h"
46 #include "tokens.h"
47 #include "userfunc.h"
48 #include <format/format.h>
49 
50 namespace Pol
51 {
52 namespace Bscript
53 {
56 
57 extern int include_debug;
58 
59 std::string getpathof( const std::string& fname )
60 {
61  std::string::size_type pos = fname.find_last_of( "\\/" );
62  if ( pos == std::string::npos )
63  return "./";
64  else
65  return fname.substr( 0, pos + 1 );
66 }
67 
68 
69 bool Scope::varexists( const std::string& varname, unsigned& idx ) const
70 {
71  for ( int i = static_cast<int>( variables_.size() - 1 ); i >= 0; --i )
72  {
73  if ( Clib::stringicmp( varname, variables_[i].name ) == 0 )
74  {
76  {
77  INFO_PRINT << "Warning: variable '" << variables_[i].name
78  << "' declared as unused but used.\n";
80  throw std::runtime_error( "Warnings treated as errors." );
81  }
82  variables_[i].used = true;
83  idx = i;
84  return true;
85  }
86  }
87  return false;
88 }
89 
90 bool Scope::varexists( const std::string& varname ) const
91 {
92  for ( int i = static_cast<int>( variables_.size() - 1 ); i >= 0; --i )
93  {
94  if ( Clib::stringicmp( varname, variables_[i].name ) == 0 )
95  {
96  return true;
97  }
98  }
99  return false;
100 }
101 
103 {
104  blockdescs_.push_back( BlockDesc() );
105  return blockdescs_.back();
106 }
107 
108 void Scope::popblock( bool varsOnly = false )
109 {
110  BlockDesc& bd = blockdescs_.back();
111 
112  for ( ; bd.varcount; bd.varcount-- ) // To enable popping variables only
113  {
114  Variable& bk = variables_.back();
116  {
117  INFO_PRINT << "Warning: local variable '" << bk.name << "' not used.\n";
119  throw std::runtime_error( "Warnings treated as errors." );
120  else
121  INFO_PRINT << bk.ctx;
122  }
123  variables_.pop_back();
124  }
125 
126  if ( !varsOnly )
127  blockdescs_.pop_back();
128 }
129 
130 void Scope::addvar( const std::string& varname, const CompilerContext& ctx, bool warn_on_notused,
131  bool unused )
132 {
133  for ( size_t i = variables_.size() - blockdescs_.back().varcount; i < variables_.size(); ++i )
134  {
135  if ( Clib::stringicmp( varname, variables_[i].name ) == 0 )
136  {
137  throw std::runtime_error( "Variable " + varname + " is already in scope." );
138  }
139  }
140  Variable newvar;
141  newvar.name = varname;
142  newvar.ctx = ctx;
143  newvar.used = !warn_on_notused;
144  newvar.unused = unused;
145  variables_.push_back( newvar );
146  blockdescs_.back().varcount++;
147 }
148 
150 {
151  blockdescs_.back().valcount++;
152 }
153 
155 {
156  while ( !tokens.empty() )
157  {
158  Token* tkn = tokens.back();
159  tokens.pop_back();
160  delete tkn;
161  }
162 
163 
164  while ( !TX.empty() )
165  {
166  Token* tkn = TX.top();
167  TX.pop();
168  delete tkn;
169  }
170 
171 
172  while ( !CA.empty() )
173  {
174  Token* tkn = CA.front();
175  CA.pop();
176  delete tkn;
177  }
178 }
179 
181 {
182  while ( !expr.CA.empty() )
183  {
184  CA.push( expr.CA.front() );
185  expr.CA.pop();
186  }
187 }
188 
190 {
191  while ( !expr.tokens.empty() )
192  {
193  CA.push( expr.tokens.front() );
194  expr.tokens.erase( expr.tokens.begin() );
195  }
196 }
197 
199 {
200  int lval = 0;
201  switch ( oper->id )
202  {
203  case TOK_ADD:
204  lval = left->lval + right->lval;
205  break;
206  case TOK_SUBTRACT:
207  lval = left->lval - right->lval;
208  break;
209  case TOK_MULT:
210  lval = left->lval * right->lval;
211  break;
212  case TOK_DIV:
213  if ( right->lval == 0 )
214  throw std::runtime_error( "Program would divide by zero" );
215  lval = left->lval / right->lval;
216  break;
217 
218  case TOK_EQUAL:
219  lval = ( left->lval == right->lval );
220  break;
221  case TOK_NEQ:
222  lval = ( left->lval != right->lval );
223  break;
224  case TOK_LESSTHAN:
225  lval = ( left->lval < right->lval );
226  break;
227  case TOK_LESSEQ:
228  lval = ( left->lval <= right->lval );
229  break;
230  case TOK_GRTHAN:
231  lval = ( left->lval > right->lval );
232  break;
233  case TOK_GREQ:
234  lval = ( left->lval >= right->lval );
235  break;
236 
237  case TOK_AND:
238  lval = ( left->lval && right->lval );
239  break;
240  case TOK_OR:
241  lval = ( left->lval || right->lval );
242  break;
243 
244  case TOK_BSRIGHT:
245  lval = ( left->lval >> right->lval );
246  break;
247  case TOK_BSLEFT:
248  lval = ( left->lval << right->lval );
249  break;
250  case TOK_BITAND:
251  lval = ( left->lval & right->lval );
252  break;
253  case TOK_BITOR:
254  lval = ( left->lval | right->lval );
255  break;
256  case TOK_BITXOR:
257  lval = ( left->lval ^ right->lval );
258  break;
259 
260  default:
261  return NULL;
262 
263  break;
264  }
265 
266  auto ntoken = new Token( *left );
267  ntoken->lval = lval;
268  return ntoken;
269 }
270 
272 {
273  double dval = 0.0;
274 
275  switch ( oper->id )
276  {
277  case TOK_ADD:
278  dval = left->dval + right->dval;
279  break;
280  case TOK_SUBTRACT:
281  dval = left->dval - right->dval;
282  break;
283  case TOK_MULT:
284  dval = left->dval * right->dval;
285  break;
286  case TOK_DIV:
287  if ( right->dval == 0.0 )
288  throw std::runtime_error( "Program would divide by zero" );
289  dval = left->dval / right->dval;
290  break;
291 
292  default:
293  break;
294  }
295 
296  auto ntoken = new Token( *left );
297  ntoken->dval = dval;
298  return ntoken;
299 }
300 
301 
303 {
304  Token* ntoken = NULL;
305  switch ( oper->id )
306  {
307  case TOK_ADD:
308  {
309  ntoken = new Token( *left );
310  std::string combined;
311  combined = std::string( left->tokval() ) + std::string( right->tokval() );
312  ntoken->copyStr( combined.c_str() );
313  }
314  break;
315 
316  default:
317  break;
318  }
319  return ntoken;
320 }
321 
323 {
324  Token* ntoken = NULL;
325  switch ( oper->id )
326  {
327  case TOK_UNMINUS:
328  ntoken = new Token( *value );
329  ntoken->lval = -value->lval;
330  break;
331  case TOK_LOG_NOT:
332  ntoken = new Token( *value );
333  ntoken->lval = !value->lval;
334  break;
335  case TOK_BITWISE_NOT:
336  ntoken = new Token( *value );
337  ntoken->lval = ~ntoken->lval;
338  break;
339 
340  default:
341  break;
342  }
343  return ntoken;
344 }
346 {
347  Token* ntoken = NULL;
348  switch ( oper->id )
349  {
350  case TOK_UNMINUS:
351  ntoken = new Token( *value );
352  ntoken->dval = -value->dval;
353  break;
354 
355  default:
356  break;
357  }
358  return ntoken;
359 }
360 Token* optimize_string_operation( Token* /*oper*/, Token* /*value*/ )
361 {
362  return NULL;
363 }
364 
366 {
367  for ( unsigned i = 0; i < tokens.size(); i++ )
368  {
369  Token* oper = tokens[i];
370  if ( oper->type != TYP_OPERATOR )
371  {
372  continue;
373  }
374  if ( i < 2 )
375  throw std::runtime_error( "Unbalanced binary operator: " + Clib::tostring( *oper ) );
376 
377  Token* left = tokens[i - 2];
378  Token* right = tokens[i - 1];
379  if ( left->id != right->id )
380  {
381  // only optimize operations on like operands
382  continue;
383  }
384  if ( left->id != TOK_LONG && left->id != TOK_STRING && left->id != TOK_DOUBLE )
385  {
386  continue;
387  }
388 
389  Token* ntoken = NULL;
390  switch ( left->id )
391  {
392  case TOK_LONG:
393  ntoken = optimize_long_operation( left, oper, right );
394  break;
395  case TOK_DOUBLE:
396  ntoken = optimize_double_operation( left, oper, right );
397  break;
398  case TOK_STRING:
399  ntoken = optimize_string_operation( left, oper, right );
400  break;
401 
402 
403  default:
404  break;
405  }
406  if ( ntoken )
407  {
408  delete left;
409  delete right;
410  delete oper;
411  tokens[i - 2] = ntoken;
412  tokens.erase( tokens.begin() + ( i - 1 ), tokens.begin() + ( i + 1 ) );
413  }
414  }
415 }
416 
418 {
419  for ( unsigned i = 0; i < tokens.size(); i++ )
420  {
421  Token* oper = tokens[i];
422  if ( oper->type != TYP_UNARY_OPERATOR )
423  {
424  continue;
425  }
426  if ( i < 1 )
427  throw std::runtime_error( "Unbalanced unary operator: " + Clib::tostring( *oper ) );
428 
429  Token* value = tokens[i - 1];
430 
431  if ( oper->id == TOK_UNPLUS )
432  {
433  // unary plus does nothing.
434  delete oper;
435  tokens.erase( tokens.begin() + i, tokens.begin() + i + 1 );
436  continue;
437  }
438  if ( value->id != TOK_LONG && value->id != TOK_STRING && value->id != TOK_DOUBLE )
439  {
440  continue;
441  }
442  Token* ntoken = NULL;
443  switch ( value->id )
444  {
445  case TOK_LONG:
446  ntoken = optimize_long_operation( oper, value );
447  break;
448  case TOK_DOUBLE:
449  ntoken = optimize_double_operation( oper, value );
450  break;
451  case TOK_STRING:
452  ntoken = optimize_string_operation( oper, value );
453  break;
454  default:
455  break;
456  }
457  if ( ntoken )
458  {
459  delete value;
460  delete oper;
461  tokens[i - 1] = ntoken;
462  tokens.erase( tokens.begin() + i, tokens.begin() + ( i + 1 ) );
463  }
464  }
465 }
466 
467 int Expression::get_num_tokens( int idx ) const
468 {
469  Token* tkn = tokens[idx];
470  int children = 0;
471 
472  if ( tkn->type == TYP_OPERAND ) // constant
473  {
474  // "anonymous" struct definitions inside array/dict/...
475  if ( tkn->id == INS_ADDMEMBER_ASSIGN ) // struct{a:=1}
476  children = 2;
477  else if ( tkn->id == INS_ADDMEMBER2 ) // struct{a}
478  children = 1;
479  else
480  children = 0; // just myself
481  }
482  else if ( tkn->id == INS_ADDMEMBER_ASSIGN )
483  {
484  children = 2;
485  }
486  else if ( tkn->id == INS_DICTIONARY_ADDMEMBER )
487  {
488  children = 3;
489  }
490  else if ( tkn->id == INS_MULTISUBSCRIPT )
491  {
492  children = 1 + tkn->lval;
493  }
494  else if ( tkn->id == INS_MULTISUBSCRIPT_ASSIGN )
495  {
496  children = 1 + tkn->lval;
497  }
498  else if ( tkn->type == TYP_OPERATOR ) // binary operator
499  {
500  children = 2;
501  }
502  else if ( tkn->type == TYP_UNARY_OPERATOR )
503  {
504  children = 1;
505  }
506  else if ( tkn->type == TYP_FUNC )
507  {
508  children = static_cast<int>( tkn->userfunc->parameters.size() );
509  }
510  else if ( tkn->type == TYP_METHOD )
511  {
512  children = 1 + tkn->lval;
513  }
514  else if ( tkn->type == TYP_USERFUNC )
515  {
516  // the CTRL_JSR_USERFUNC
517  // FIXME: TODO: what?
518  children = 1;
519  }
520  else if ( tkn->id == CTRL_JSR_USERFUNC )
521  {
522  // the CTRL_MAKELOCAL + the parameters
523  children = static_cast<int>( 1 + tkn->userfunc->parameters.size() );
524  }
525  else if ( tkn->id == CTRL_MAKELOCAL )
526  {
527  children = 0;
528  }
529  else
530  {
531  passert_always( 0 );
532  }
533  int count = 1;
534  for ( int i = 0; i < children; ++i )
535  {
536  count += get_num_tokens( idx - count );
537  }
538  return count;
539 }
540 
542 {
543  Token* oper = tokens[i];
544  if ( oper->type == TYP_OPERATOR && oper->id == TOK_ASSIGN )
545  {
546  int right_idx = i - 1;
547  int left_idx = right_idx - get_num_tokens( i - 1 );
548  if ( right_idx < 0 || left_idx < 0 )
549  {
550  throw std::runtime_error( "Unbalanced operator: " + Clib::tostring( *oper ) );
551  }
552  // Token* right = tokens[ right_idx ];
553  Token* left = tokens[left_idx];
554  if ( left->id == TOK_ARRAY_SUBSCRIPT )
555  {
556  oper->id = INS_SUBSCRIPT_ASSIGN;
557  oper->type = TYP_UNARY_OPERATOR;
558  oper->lval = left->lval;
559  delete left;
560  tokens.erase( tokens.begin() + left_idx, tokens.begin() + ( left_idx + 1 ) );
561  return true;
562  }
563  else if ( left->id == INS_MULTISUBSCRIPT )
564  {
566  oper->type = TYP_UNARY_OPERATOR;
567  oper->lval = left->lval;
568  delete left;
569  tokens.erase( tokens.begin() + left_idx, tokens.begin() + ( left_idx + 1 ) );
570  return true;
571  }
572  else if ( left->id == INS_GET_MEMBER )
573  {
574  oper->id = INS_SET_MEMBER;
575  oper->type = TYP_UNARY_OPERATOR;
576  oper->copyStr( left->tokval() );
577  delete left;
578  tokens.erase( tokens.begin() + left_idx, tokens.begin() + ( left_idx + 1 ) );
579  return true;
580  }
581  else if ( left->id == INS_GET_MEMBER_ID )
582  {
583  OSTRINGSTREAM os;
584  os << left->lval;
585 
586  oper->id = INS_SET_MEMBER_ID;
587  oper->type = TYP_UNARY_OPERATOR;
588  oper->copyStr( OSTRINGSTREAM_STR( os ).c_str() );
589  oper->lval = left->lval;
590  delete left;
591  tokens.erase( tokens.begin() + left_idx, tokens.begin() + ( left_idx + 1 ) );
592  return true;
593  }
594  }
595  else if ( oper->id == INS_ASSIGN_CONSUME )
596  {
597  int right_idx = i - 1;
598  int left_idx = right_idx - get_num_tokens( i - 1 );
599  // Token* right = tokens[ right_idx ];
600  Token* left = tokens[left_idx];
601  if ( left->id == TOK_LOCALVAR || left->id == TOK_GLOBALVAR )
602  {
603  // FIXME: assigning to a global/local, then consuming. we can do better, for this special
604  // case.
605  if ( left->id == TOK_LOCALVAR )
606  oper->id = INS_ASSIGN_LOCALVAR;
607  else if ( left->id == TOK_GLOBALVAR )
608  oper->id = INS_ASSIGN_GLOBALVAR;
609  oper->type = TYP_UNARY_OPERATOR;
610  oper->lval = left->lval;
611  oper->copyStr( left->tokval() );
612  delete left;
613  tokens.erase( tokens.begin() + left_idx, tokens.begin() + left_idx + 1 );
614  return true;
615  }
616  }
617  else if ( oper->id == TOK_CONSUMER )
618  {
619  Token* operand = tokens[i - 1];
620  if ( operand->id == TOK_ASSIGN )
621  {
622  operand->id = INS_ASSIGN_CONSUME;
623  delete oper;
624  tokens.pop_back();
625  return true;
626  }
627  else if ( operand->id == INS_SUBSCRIPT_ASSIGN )
628  {
629  operand->id = INS_SUBSCRIPT_ASSIGN_CONSUME;
630  delete oper;
631  tokens.pop_back();
632  return true;
633  }
634  else if ( operand->id == INS_SET_MEMBER )
635  {
636  operand->id = INS_SET_MEMBER_CONSUME;
637  delete oper;
638  tokens.pop_back();
639  return true;
640  }
641  else if ( operand->id == INS_SET_MEMBER_ID )
642  {
643  operand->id = INS_SET_MEMBER_ID_CONSUME;
644  delete oper;
645  tokens.pop_back();
646  return true;
647  }
648  }
649  else if ( oper->id == TOK_MEMBER )
650  {
651  Token* operand = tokens[i - 1];
652  if ( operand->id == TOK_STRING /*|| operand->id == INS_CALL_METHOD*/ )
653  {
654  ObjMember* objmemb = getKnownObjMember( operand->tokval() );
655  if ( objmemb != NULL && compilercfg.OptimizeObjectMembers )
656  {
657  // merge the member name with the member operator.
658  oper->id = INS_GET_MEMBER_ID;
659  oper->type = TYP_UNARY_OPERATOR;
660  oper->lval = (int)objmemb->id;
661 
662  delete operand;
663  tokens.erase( tokens.begin() + i - 1, tokens.begin() + i );
664  // 1: local #0
665  // 2: get member id 'warmode' (27)
666  // 3: 1L
667  // 4: +=
668  // 5: #
669  if ( i < 3 ) // has to be the first op
670  {
671  if ( tokens[tokens.size() - 1]->id == TOK_CONSUMER )
672  operand = tokens[tokens.size() - 2];
673  else
674  operand = tokens[tokens.size() - 1];
675  if ( operand->id == TOK_PLUSEQUAL || operand->id == TOK_MINUSEQUAL ||
676  operand->id == TOK_TIMESEQUAL || operand->id == TOK_DIVIDEEQUAL ||
677  operand->id == TOK_MODULUSEQUAL )
678  {
679  // 12: local #0
680  // 13: 1L
681  // 14: set member id 'warmode' (27) += #
682  if ( tokens[tokens.size() - 1]->id == TOK_CONSUMER )
683  tokens.pop_back(); // delete consumer
684 
685  if ( operand->id == TOK_PLUSEQUAL )
687  else if ( operand->id == TOK_MINUSEQUAL )
689  else if ( operand->id == TOK_TIMESEQUAL )
691  else if ( operand->id == TOK_DIVIDEEQUAL )
693  else if ( operand->id == TOK_MODULUSEQUAL )
695  delete operand;
696  tokens.pop_back(); // delete +=
697  tokens.erase( tokens.begin() + i - 1, tokens.begin() + i ); // remove setmember
698  tokens.insert( tokens.end(), oper ); // and append it
699  OSTRINGSTREAM os;
700  os << oper->lval;
701  oper->copyStr( OSTRINGSTREAM_STR( os ).c_str() );
702  }
703  }
704  return true;
705  }
706  else
707  {
708  // merge the member name with the member operator.
709  oper->id = INS_GET_MEMBER;
710  oper->type = TYP_UNARY_OPERATOR;
711  oper->copyStr( operand->tokval() );
712 
713  delete operand;
714  tokens.erase( tokens.begin() + i - 1, tokens.begin() + i );
715  return true;
716  }
717  }
718  else
719  {
720  throw std::runtime_error( "Expected an identifier to follow the member (.) operator." );
721  }
722  }
723 
724  return false;
725 }
727 {
728  for ( unsigned i = 1; i < tokens.size(); i++ )
729  {
730  if ( optimize_token( i ) )
731  return;
732  }
733 }
734 
735 /*
736  TODO:
737  add options to:
738  a) disable all optimization
739  b) disable optimization on doubles
740  OPTIMIZATIONS PERFORMED:
741  Constant Long Arithmetic: (+ - * /)
742  5 + 7 -> 5 7 + -> 12
743  Constant Double Arithmetic: (+ - * /)
744  5.5 + 6.5 -> 5.5 6.5 + -> 12.0
745  String Concatenation: (+)
746  "hello" + " world" -> "hello" " world" + -> "hello world"
747  OPTIMIZATIONS TO BE CONSIDERED:
748  More efficient constructs: (+=, -=, *=, /=)
749  A := A + 5 -> A A 5 + := -> A 5 +=
750  Note, this is harder for things like:
751  A[5] := A[5] + 4;
752  which requires reversing your way up the expression
753  and searching for like operands.
754 
755  Optimizations that must take place at a different level:
756  0 IFFALSE -> GOTO
757  1 IFFALSE -> NOP
758  0 IF -> NOP
759  1 IF -> GOTO
760  34: GOTO 56
761  56: GOTO 78 --> 34: GOTO 78 56: GOTO 78
762  NOP -> (null) (compress NOPs)
763  */
765 {
766  size_t starting_size;
767  do
768  {
769  starting_size = tokens.size();
770  optimize_binary_operations();
771  optimize_unary_operations();
772  optimize_assignments();
773 
774  } while ( tokens.size() != starting_size );
775 }
776 
777 void Compiler::enterblock( eb_label_ok eblabel, eb_break_ok ebbreak, eb_continue_ok ebcontinue )
778 {
779  program->enterblock();
780 
781  BlockDesc& bd = localscope.pushblock();
782  bd.varcount = 0;
783  bd.label_ok = eblabel;
784  bd.break_ok = ebbreak;
785  bd.continue_ok = ebcontinue;
786 
787  if ( bd.label_ok == CanBeLabelled )
788  {
789  bd.label = latest_label;
790  latest_label = "";
791  }
792  else
793  {
794  bd.label = "";
795  }
796 }
798 {
799  enterblock( et, et ? BreakOk : BreakNotOk, et ? ContinueOk : ContinueNotOk );
800 }
801 
802 void Compiler::patchblock_breaks( unsigned breakPC )
803 {
804  // now, patch up the GOTO part of BREAK statements.
805  // they each have a LEAVE_BLOCK appropriate to where they are.
806  const BlockDesc& bd = localscope.blockdesc();
807  for ( auto patchip : bd.break_tokens )
808  {
809  patchoffset( patchip, breakPC ); // program->tokens.next()
810  }
811 }
812 
813 void Compiler::patchblock_continues( unsigned continuePC )
814 {
815  const BlockDesc& bd = localscope.blockdesc();
816  for ( auto patchip : bd.continue_tokens )
817  {
818  patchoffset( patchip, continuePC );
819  }
820 }
821 
823 {
824  if ( localscope.numVarsInBlock() )
825  { // local variables were declared in this scope. We need to kill 'em.
826  program->append(
827  StoredToken( Mod_Basic, CTRL_LEAVE_BLOCK, TYP_CONTROL, localscope.numVarsInBlock() ) );
828  }
829 }
830 
831 void Compiler::leaveblock( unsigned breakPC, unsigned continuePC )
832 {
833  emit_leaveblock();
834  patchblock_breaks( breakPC );
835  patchblock_continues( continuePC );
836 
837  localscope.popblock();
838  program->leaveblock();
839 }
840 
842  : SmartParser(),
843  current_file_path( "" ),
844  curSourceFile( 0 ),
845  inExpr( 0 ),
846  inFunction( 0 ),
847  haveProgram( false ),
848  compiling_include( false ),
849  programPos( 0 ),
850  nProgramArgs( 0 ),
851  program_ctx(),
852  program_source( NULL ),
853  included(),
854  referencedPathnames(),
855  program( new EScriptProgram )
856 {
857  setQuiet( 1 );
858  err = PERR_NONE;
859 }
860 
862 {
863  while ( !delete_these_arrays.empty() )
864  {
865  char* s = delete_these_arrays.back();
866  delete[] s;
867  delete_these_arrays.pop_back();
868  }
869 }
870 
871 
872 bool Compiler::globalexists( const std::string& varname, unsigned& idx, CompilerContext* ctx ) const
873 {
874  for ( unsigned i = 0; i < static_cast<unsigned>( globals_.size() ); ++i )
875  {
876  if ( Clib::stringicmp( varname, globals_[i].name ) == 0 )
877  {
878  idx = i;
879  if ( ctx )
880  *ctx = globals_[i].ctx;
881  return true;
882  }
883  }
884  return false;
885 }
886 
887 bool Compiler::varexists( const std::string& varname ) const
888 {
889  unsigned idx;
890  if ( localscope.varexists( varname, idx ) )
891  return true;
892 
893  if ( globalexists( varname, idx ) )
894  return true;
895 
896  return false;
897 }
898 
900 {
901  if ( inExpr && ( token.id == TOK_ASSIGN ) )
902  {
903  if ( verbosity_level_ >= 5 )
904  {
905  INFO_PRINT << "Warning! possible incorrect assignment.\n"
906  << "Near: " << curLine << "\n";
908  throw std::runtime_error( "Warnings treated as errors." );
909  }
910  }
911 
912  return 1; // assignments valid everywhere. back to simple parser
913 }
914 
915 struct Candidate
916 {
917  Candidate( int module, int funcidx ) : module( module ), funcidx( funcidx ), modfunc( nullptr ) {}
918  int module;
919  int funcidx;
921 };
922 
924 {
925  typedef std::vector<Candidate> Candidates;
926  Candidates candidates;
927  std::string modulename;
928  std::string funcname;
929 
930  if ( const char* colon = strchr( token.tokval(), ':' ) )
931  {
932  std::string tmp( token.tokval(), colon );
933  if ( tmp.length() >= 9 )
934  {
935  INFO_PRINT << "'" << tmp << "' is too long to be a module name.\n";
936  return -1;
937  }
938  modulename = tmp;
939  funcname = std::string( colon + 2 );
940  }
941  else
942  {
943  modulename = "";
944  funcname = token.tokval();
945  }
946 
947  for ( unsigned i = 0; i < program->modules.size(); i++ )
948  {
949  if ( !modulename.empty() &&
950  !( modulename == program->modules[i]->modulename ) ) // STLport doesn't like != here
951  {
952  continue;
953  }
954 
955  int funcidx;
956  if ( program->modules[i]->isFunc( funcname.c_str(), pmf, &funcidx ) )
957  {
958  candidates.push_back( Candidate( i, funcidx ) );
959  }
960  }
961 
962  if ( candidates.empty() )
963  {
964  return 0;
965  }
966  else if ( candidates.size() == 1 )
967  {
968  token.module =
969  static_cast<unsigned char>( candidates[0].module ); // WAS module,we're using relative now
970  token.type = TYP_FUNC;
971  token.id = TOK_FUNC;
972  token.lval = candidates[0].funcidx;
973  token.userfunc = ( *pmf )->uf;
974  return 1;
975  }
976  else
977  {
978  INFO_PRINT << "Function '" << funcname
979  << "' exists in more than module. It must be qualified.\n";
980  for ( Candidates::const_iterator itr = candidates.begin(); itr != candidates.end(); ++itr )
981  {
982  INFO_PRINT << "\t" << program->modules[itr->module]->modulename.get() << "\n";
983  }
984 
985  return -1;
986  }
987 }
988 
990 {
991  module->fillFunctionsByName();
992  program->modules.push_back( module );
993 }
994 
996 {
997  if ( token.id != TOK_IDENT )
998  return 0;
999  passert( token.tokval() );
1000 
1001  auto itr = userFunctions.find( token.tokval() );
1002  if ( itr != userFunctions.end() )
1003  {
1004  token.module = Mod_Basic;
1005  token.type = TYP_USERFUNC;
1006  token.id = TOK_USERFUNC;
1007  token.userfunc = &( *itr ).second;
1008  *f = &( *itr ).second;
1009  return 1;
1010  }
1011  return 0;
1012 }
1014 {
1015  int res;
1016  Token token;
1017  res = peekToken( ctx, token );
1018  if ( res < 0 )
1019  return res;
1020  if ( token.id != TOK_LPAREN )
1021  return 0;
1022  getToken( ctx, token );
1023 
1024  // it's valid to have a right-paren immediately following, so check for that:
1025 
1026  res = peekToken( ctx, token );
1027  if ( res < 0 )
1028  return res;
1029  if ( token.id == TOK_RPAREN )
1030  {
1031  getToken( ctx, token );
1032  return 0;
1033  }
1034 
1035 
1036  for ( ;; )
1037  {
1038  res = peekToken( ctx, token );
1039  if ( res < 0 )
1040  return res;
1041  /*
1042  if we get an rparen HERE, it means the script has something like
1043  var x := array ( 2, 3, );
1044  report this as an error.
1045  */
1046  if ( token.id == TOK_RPAREN )
1047  {
1048  INFO_PRINT
1049  << "Expected expression following comma before right-brace in array initializer list\n";
1050  return -1;
1051  }
1052  if ( token.id == TOK_COMMA )
1053  {
1054  INFO_PRINT << "Unexpected comma in array initializer list\n";
1055  return -1;
1056  }
1057  Expression eex;
1059  if ( res < 0 )
1060  return res;
1061 
1062  expr.eat2( eex );
1063 
1064  expr.CA.push( new Token( TOK_INSERTINTO, TYP_OPERATOR ) );
1065 
1066  res = getToken( ctx, token );
1067  if ( res < 0 )
1068  return res;
1069  if ( token.id == TOK_COMMA )
1070  {
1071  continue;
1072  }
1073  else if ( token.id == TOK_RPAREN )
1074  {
1075  return 0;
1076  ;
1077  }
1078  else
1079  {
1080  INFO_PRINT << "Token '" << token << "' unexpected in array initializer list\n";
1081  return -1;
1082  }
1083  }
1084 }
1085 
1087 {
1088  int res;
1089  Token token;
1090  // the left-brace has already been eaten
1091 
1092  // it's valid to have a right-brace immediately following, so check for that:
1093 
1094  res = peekToken( ctx, token );
1095  if ( res < 0 )
1096  return res;
1097  if ( token.id == TOK_RBRACE )
1098  {
1099  getToken( ctx, token );
1100  return 0;
1101  }
1102 
1103  for ( ;; )
1104  {
1105  res = peekToken( ctx, token );
1106  if ( res < 0 )
1107  return res;
1108 
1109  // if we get an rbrace HERE, it means the script has something like
1110  // var x := array { 2, 3, };
1111  // report this as an error.
1112  if ( token.id == TOK_RBRACE )
1113  {
1114  INFO_PRINT
1115  << "Expected expression following comma before right-brace in array initializer list\n";
1116  return -1;
1117  }
1118  // we're expecting an expression, not a comma, at this point
1119  if ( token.id == TOK_COMMA )
1120  {
1121  INFO_PRINT << "Unexpected comma in array initializer list\n";
1122  return -1;
1123  }
1124  Expression eex;
1126  if ( res < 0 )
1127  return res;
1128 
1129  expr.eat2( eex );
1130 
1131  expr.CA.push( new Token( TOK_INSERTINTO, TYP_OPERATOR ) );
1132 
1133  // the element can be followed by a comma, or by a rightbrace. eat either.
1134  res = getToken( ctx, token );
1135  if ( res < 0 )
1136  return res;
1137  if ( token.id == TOK_COMMA )
1138  {
1139  continue;
1140  }
1141  else if ( token.id == TOK_RBRACE )
1142  {
1143  return 0;
1144  }
1145  else
1146  {
1147  INFO_PRINT << "Token '" << token << "' unexpected in array initializer list\n";
1148  return -1;
1149  }
1150  }
1151 }
1152 
1154 {
1155  int res;
1156  Token token;
1157  res = peekToken( ctx, token );
1158  if ( res < 0 )
1159  return res;
1160  if ( token.id != TOK_LBRACE )
1161  return 0;
1162  getToken( ctx, token );
1163 
1164  // it's valid to have a right-brace immediately following, so check for that:
1165  res = peekToken( ctx, token );
1166  if ( res < 0 )
1167  return res;
1168  if ( token.id == TOK_RBRACE )
1169  {
1170  getToken( ctx, token );
1171  return 0;
1172  }
1173 
1174 
1175  for ( ;; )
1176  {
1177  res = peekToken( ctx, token );
1178  if ( res < 0 )
1179  return res;
1180  // if we get an rbrace HERE, it means the script has something like
1181  // var x := struct { x, y, };
1182  // report this as an error.
1183  if ( token.id == TOK_RBRACE )
1184  {
1185  INFO_PRINT
1186  << "Expected expression following comma before right-brace in struct initializer list\n";
1187  return -1;
1188  }
1189  if ( token.id == TOK_COMMA )
1190  {
1191  INFO_PRINT << "Unexpected comma in struct element list\n";
1192  return -1;
1193  }
1194 
1195  if ( token.id == TOK_IDENT || token.id == TOK_STRING )
1196  {
1197  Token ident_tkn;
1198  Parser::getToken( ctx, ident_tkn );
1199 
1200  res = peekToken( ctx, token );
1201  if ( token.id == TOK_ASSIGN )
1202  {
1203  getToken( ctx, token );
1204  // something like struct { a := 5 };
1205  Expression eex;
1206  res =
1208  if ( res < 0 )
1209  return res;
1210 
1211  auto addmem = new Token( ident_tkn );
1212  addmem->id = INS_ADDMEMBER_ASSIGN;
1213 
1214  expr.eat2( eex );
1215  expr.CA.push( addmem );
1216  }
1217  else if ( token.id == TOK_EQUAL1 )
1218  {
1219  INFO_PRINT << "Unexpected token: '" << token << "'. Did you mean := for assign?\n";
1220  return -1;
1221  }
1222  else
1223  {
1224  auto addmem = new Token( ident_tkn );
1225  addmem->id = INS_ADDMEMBER2;
1226  expr.CA.push( addmem );
1227  }
1228  }
1229  else
1230  {
1231  INFO_PRINT << "Unexpected token in struct initializer list: " << token << "\n";
1232  return -1;
1233  }
1234 
1235 
1236  res = getToken( ctx, token );
1237  if ( res < 0 )
1238  return res;
1239  if ( token.id == TOK_COMMA )
1240  {
1241  continue;
1242  }
1243  else if ( token.id == TOK_RBRACE )
1244  {
1245  return 0;
1246  }
1247  else
1248  {
1249  INFO_PRINT << "Token '" << token << "' unexpected in struct initializer list\n";
1250  return -1;
1251  }
1252  }
1253  // unreachable
1254 }
1255 
1257 {
1258  int res;
1259  Token token;
1260  res = peekToken( ctx, token );
1261  if ( res < 0 )
1262  return res;
1263  if ( token.id != TOK_LBRACE )
1264  return 0;
1265  getToken( ctx, token );
1266 
1267  // it's valid to have a right-brace immediately following, so check for that:
1268  res = peekToken( ctx, token );
1269  if ( res < 0 )
1270  return res;
1271  if ( token.id == TOK_RBRACE )
1272  {
1273  getToken( ctx, token );
1274  return 0;
1275  }
1276 
1277 
1278  for ( ;; )
1279  {
1280  res = peekToken( ctx, token );
1281  if ( res < 0 )
1282  return res;
1283  // if we get an rbrace HERE, it means the script has something like
1284  // var x := dictionary { "x", "y", };
1285  // report this as an error.
1286  if ( token.id == TOK_RBRACE )
1287  {
1288  INFO_PRINT << "Expected expression following comma before right-brace in dictionary "
1289  "initializer list\n";
1290  return -1;
1291  }
1292  if ( token.id == TOK_COMMA )
1293  {
1294  INFO_PRINT << "Unexpected comma in dictionary element list\n";
1295  return -1;
1296  }
1297 
1298 
1299  // first get the key expression.
1300 
1301  Expression key_expression;
1302  res = readexpr( key_expression, ctx,
1305  if ( res < 0 )
1306  return res;
1307 
1308  expr.eat2( key_expression );
1309 
1310  // if the key is followed by "->", then grab the value
1311  res = peekToken( ctx, token );
1312  if ( res < 0 )
1313  return res;
1314  if ( token.id == TOK_DICTKEY )
1315  {
1316  getToken( ctx, token );
1317  // get the value expression
1318 
1319  Expression value_expression;
1320  res = readexpr( value_expression, ctx,
1322  if ( res < 0 )
1323  return res;
1324 
1325  expr.eat2( value_expression );
1326  }
1327  else
1328  {
1329  // an uninit
1330  expr.CA.push( new Token( INS_UNINIT, TYP_OPERAND ) );
1331  }
1332 
1333  expr.CA.push( new Token( INS_DICTIONARY_ADDMEMBER, TYP_OPERATOR ) );
1334 
1335  res = getToken( ctx, token );
1336  if ( res < 0 )
1337  return res;
1338  if ( token.id == TOK_COMMA )
1339  {
1340  continue;
1341  }
1342  else if ( token.id == TOK_RBRACE )
1343  {
1344  return 0;
1345  }
1346  else
1347  {
1348  INFO_PRINT << "Token '" << token << "' unexpected in struct element list\n";
1349  return -1;
1350  }
1351  }
1352  // unreachable
1353 }
1354 
1356 {
1357  int res;
1358  Token token;
1359  nargs = 0;
1360  res = getToken( ctx, token );
1361  passert( token.id == TOK_LPAREN );
1362  passert( res == 0 );
1363  for ( ;; )
1364  {
1365  res = peekToken( ctx, token );
1366  if ( res < 0 )
1367  return res;
1368  if ( token.id == TOK_RPAREN )
1369  {
1370  getToken( ctx, token );
1371  return 0;
1372  }
1373  if ( token.id == TOK_COMMA )
1374  {
1375  INFO_PRINT << "Unexpected comma in array element list\n";
1376  return -1;
1377  }
1378  Expression eex;
1380  if ( res < 0 )
1381  return res;
1382 
1383  expr.eat2( eex );
1384 
1385  ++nargs;
1386 
1387  res = getToken( ctx, token );
1388  if ( res < 0 )
1389  return res;
1390  if ( token.id == TOK_COMMA )
1391  {
1392  continue;
1393  }
1394  else if ( token.id == TOK_RPAREN )
1395  {
1396  return 0;
1397  }
1398  else
1399  {
1400  INFO_PRINT << "Token '" << token << "' unexpected in array element list\n";
1401  return -1;
1402  }
1403  }
1404  // unreachable
1405 }
1406 
1408 {
1409  int res;
1410  Token token;
1411  res = getToken( ctx, *ref_tkn );
1412  if ( res < 0 || ref_tkn->id != TOK_USERFUNC )
1413  {
1414  INFO_PRINT << "Expected user function reference.\n";
1415  return -1;
1416  }
1417  ref_tkn->id = TOK_FUNCREF;
1418  ref_tkn->type = TYP_OPERAND;
1419  return 0;
1420 }
1421 
1422 /*
1423 getUserArgs is called from IIP.
1424 It is being converted from calling IIP (and thus stuffing
1425 expression data onto ex) to calling readexpr() and
1426 appending the argument expressions onto the passed
1427 ex object.
1428 
1429 Overall function:
1430 Read the parameters actually passed.
1431 Append expressions for parameters to passed expression.
1432 Append a JSR instruction
1433 */
1434 
1435 /*
1436 class ParamPassed
1437 {
1438 public:
1439 ParamPassed( const std::string& name ) : name(name) {}
1440 
1441 string name;
1442 Expression expr;
1443 };
1444 */
1445 
1446 int Compiler::getUserArgs( Expression& ex, CompilerContext& ctx, bool inject_jsr )
1447 {
1448  int res;
1449  Token token;
1450 
1451  typedef std::map<std::string, Expression> ParamList;
1452  ParamList params_passed;
1453  // std::vector<std::string> func_params;
1454 
1455  int any_named = 0;
1456  UserFunction* userfunc = userfunc_;
1457 
1458  getToken( ctx, token );
1459  if ( token.id != TOK_LPAREN )
1460  {
1461  INFO_PRINT << "Expected '(' after function name '" << userfunc->name << "'\n";
1462  res = -1;
1463  err = PERR_MISSLPAREN;
1464  return -1;
1465  }
1466 
1467  for ( ;; )
1468  {
1469  std::string varname;
1470  Token tk;
1471 
1472  CompilerContext tctx( ctx );
1473 
1474  res = getToken( tctx, tk );
1475  if ( res < 0 )
1476  return res;
1477  if ( tk.id == TOK_RPAREN )
1478  {
1479  if ( params_passed.empty() )
1480  {
1481  break;
1482  }
1483  else
1484  {
1485  INFO_PRINT << "right paren not allowed here\n";
1486  return -1;
1487  }
1488  }
1489 
1490  if ( params_passed.size() >= userfunc->parameters.size() )
1491  {
1492  INFO_PRINT << "Too many parameters passed to " << userfunc->name << "\n";
1493  return -1;
1494  }
1495 
1496  if ( tk.id == TOK_IDENT )
1497  {
1498  Token tk2;
1499  res = getToken( tctx, tk2 );
1500  if ( res < 0 )
1501  return res;
1502  if ( tk2.id == TOK_ASSIGN )
1503  {
1504  any_named = 1;
1505  varname = tk.tokval();
1506  ctx = tctx; /* skip past the 'variable :=' part */
1507  }
1508  else if ( tk2.id == TOK_EQUAL1 )
1509  {
1510  INFO_PRINT << "Unexpected token: '" << tk2 << "'. Did you mean := for assign?\n";
1511  return -1;
1512  }
1513  }
1514  if ( varname == "" )
1515  {
1516  if ( any_named )
1517  {
1518  INFO_PRINT << "unnamed args cannot follow named args\n";
1519  return -1;
1520  }
1521  varname = userfunc->parameters[params_passed.size()].name;
1522  }
1523  // FIXME case sensitivity!
1524  if ( params_passed.find( varname ) != params_passed.end() )
1525  {
1526  INFO_PRINT << "Variable " << varname << " passed more than once to " << userfunc->name
1527  << "\n";
1528  return -1;
1529  }
1530 
1531  Expression& arg_expr = params_passed[varname];
1532 
1534  if ( res < 0 )
1535  return res;
1536 
1537  Token tmp;
1538  res = peekToken( ctx, tmp );
1539  if ( res )
1540  return res;
1541  if ( tmp.id == TOK_COMMA )
1542  {
1543  getToken( ctx, tmp );
1544  }
1545  else if ( tmp.id == TOK_RPAREN )
1546  {
1547  break;
1548  }
1549  else
1550  {
1551  INFO_PRINT << "Token '" << token << "' unexpected (expected comma or right-paren)\n";
1552  return -1;
1553  }
1554  }
1555 
1556  for ( UserFunction::Parameters::const_iterator itr = userfunc->parameters.begin();
1557  itr != userfunc->parameters.end(); ++itr )
1558  {
1559  if ( params_passed.find( itr->name ) == params_passed.end() ) // not passed
1560  {
1561  if ( itr->have_default )
1562  {
1563  ex.CA.push( new Token( itr->dflt_value ) );
1564  }
1565  else
1566  {
1567  INFO_PRINT << "Function " << userfunc->name << ": Parameter " << itr->name
1568  << " was not passed, and there is no default.\n";
1569  return -1;
1570  }
1571  }
1572  else
1573  {
1574  Expression& arg_expr = params_passed[itr->name];
1575  ex.eat( arg_expr );
1576  params_passed.erase( itr->name );
1577  }
1578  }
1579 
1580  if ( !params_passed.empty() )
1581  {
1582  for ( const auto& elem : params_passed )
1583  {
1584  INFO_PRINT << "Parameter '" << elem.first << "' passed by name to " << userfunc->name
1585  << ", which takes no such parameter.\n";
1586  }
1587 
1588  return -1;
1589  }
1590  passert( params_passed.empty() );
1591 
1592  getToken( ctx, token );
1593  if ( token.id != TOK_RPAREN )
1594  {
1595  res = -1;
1596  err = PERR_MISSRPAREN;
1597  return -1;
1598  }
1599 
1600  if ( inject_jsr )
1601  {
1602  auto t = new Token( CTRL_MAKELOCAL, TYP_CONTROL );
1603  t->dbg_filenum = ctx.dbg_filenum;
1604  t->dbg_linenum = ctx.line;
1605  ex.CA.push( t );
1606  t = new Token( Mod_Basic, CTRL_JSR_USERFUNC, TYP_CONTROL, userfunc );
1607  t->dbg_filenum = ctx.dbg_filenum;
1608  t->dbg_linenum = ctx.line;
1609  ex.CA.push( t );
1610  }
1611 
1612  return 0;
1613 }
1614 
1616 {
1617  program->addToken( token );
1618 }
1619 
1621 {
1622  for ( auto& tkn : expr.tokens )
1623  {
1624  if ( tkn->id == TOK_IDENT )
1625  {
1626  unsigned idx;
1627  if ( localscope.varexists( tkn->tokval(), idx ) )
1628  {
1629  tkn->id = TOK_LOCALVAR;
1630  tkn->lval = idx;
1631  }
1632  else if ( globalexists( tkn->tokval(), idx ) )
1633  {
1634  tkn->id = TOK_GLOBALVAR;
1635  tkn->lval = idx;
1636  }
1637  }
1638  }
1639 }
1640 
1641 
1642 int Compiler::validate( const Expression& expr, CompilerContext& ctx ) const
1643 {
1644  for ( unsigned i = 0; i < static_cast<unsigned>( expr.tokens.size() ); i++ )
1645  {
1646  Token* tkn = expr.tokens[i];
1647 
1648  if ( tkn->id == TOK_IDENT )
1649  {
1650  if ( !varexists( tkn->tokval() ) )
1651  {
1652  INFO_PRINT << "Variable " << tkn->tokval() << " has not been declared"
1653  << " on line " << ctx.line << ".\n";
1654  return -1;
1655  }
1656  }
1657  else if ( tkn->id == TOK_EQUAL1 )
1658  {
1659  // Single '=' sign? Special error statement (since it could be a typo?)
1660  INFO_PRINT << "Deprecated '=' found: did you mean '==' or ':='?\n";
1661  return -1;
1662  }
1663 
1665  {
1666  INFO_PRINT << "Warning: Found deprecated "
1667  << ( tkn->type == TYP_OPERATOR ? "operator " : "token " ) << "'" << tkn->tokval()
1668  << "'"
1669  << " on line " << ctx.line << " of " << ctx.filename << "\n";
1670  // warning only; doesn't bail out.
1672  throw std::runtime_error( "Warnings treated as errors." );
1673  }
1674 
1675  if ( tkn->type == TYP_OPERATOR )
1676  {
1677  int right_idx = i - 1;
1678  if ( right_idx < 0 )
1679  {
1680  throw std::runtime_error( "Unbalanced operator: " + Clib::tostring( *tkn ) );
1681  }
1682 
1683  int left_idx = right_idx - expr.get_num_tokens( i - 1 );
1684  if ( left_idx < 0 )
1685  {
1686  throw std::runtime_error( "Unbalanced operator: " + Clib::tostring( *tkn ) );
1687  }
1688  }
1689 
1690  if ( tkn->type == TYP_UNARY_OPERATOR )
1691  {
1692  int operand_idx = i - 1;
1693  if ( operand_idx < 0 )
1694  {
1695  throw std::runtime_error( "Unbalanced operator: " + Clib::tostring( *tkn ) );
1696  }
1697  }
1698  }
1699 
1700  return 0;
1701 }
1702 
1704 {
1705  auto srch = constants.find( tkn->tokval() );
1706  if ( srch != constants.end() )
1707  {
1708  int dbg_filenum = tkn->dbg_filenum;
1709  int dbg_linenum = tkn->dbg_linenum;
1710  *tkn = ( *srch ).second;
1711  tkn->dbg_filenum = dbg_filenum;
1712  tkn->dbg_linenum = dbg_linenum;
1713  return true;
1714  }
1715  else
1716  {
1717  return false;
1718  }
1719 }
1720 
1722 {
1723  for ( auto& tkn : expr.tokens )
1724  {
1725  if ( tkn->id == TOK_IDENT )
1726  substitute_constant( tkn );
1727  }
1728 }
1729 
1730 int Compiler::readexpr( Expression& expr, CompilerContext& ctx, unsigned flags )
1731 {
1732  int res;
1733  reinit( expr );
1734  res = IIP( expr, ctx, flags );
1735  if ( res != 1 )
1736  return res;
1737  while ( !expr.CA.empty() )
1738  {
1739  Token* token = expr.CA.front();
1740  expr.CA.pop();
1741  expr.tokens.push_back( token );
1742  }
1743  if ( ( flags & EXPR_FLAG_CONSUME_RESULT ) && !expr.tokens.empty() )
1744  {
1745  auto tkn = new Token( TOK_CONSUMER, TYP_UNARY_OPERATOR );
1746  expr.tokens.push_back( tkn );
1747  }
1748  substitute_constants( expr );
1749  convert_variables( expr );
1750  expr.optimize();
1751  res = validate( expr, ctx );
1752  if ( res != 0 )
1753  return -1;
1754  return 1;
1755 }
1756 
1758 {
1759  for ( Expression::Tokens::const_iterator itr = expr.tokens.begin(); itr != expr.tokens.end();
1760  ++itr )
1761  {
1762  addToken( *( *itr ) );
1763  }
1764 }
1765 
1766 int Compiler::getExpr( CompilerContext& ctx, unsigned flags, size_t* exprlen, Expression* pex )
1767 {
1768  int res;
1769  if ( pex )
1770  {
1771  res = readexpr( *pex, ctx, flags );
1772  if ( exprlen != NULL )
1773  *exprlen = pex->tokens.size();
1774  inject( *pex );
1775  }
1776  else
1777  {
1778  Expression ex;
1779  res = readexpr( ex, ctx, flags );
1780  if ( exprlen != NULL )
1781  *exprlen = ex.tokens.size();
1782  inject( ex );
1783  }
1784  return res;
1785 }
1786 
1787 int Compiler::getExpr2( CompilerContext& ctx, unsigned expr_flags, Expression* pex )
1788 {
1789  int orig_inExpr;
1790  int res;
1791  orig_inExpr = inExpr;
1792  inExpr = 1;
1793  res = getExpr( ctx, expr_flags, NULL, pex );
1794  inExpr = orig_inExpr;
1795  return res;
1796 }
1797 
1798 /*
1799 getExpr3
1800 get an expression, must be contained in parenthesis.
1801 */
1803 {
1804  Token token;
1805  getToken( ctx, token );
1806  if ( token.id != TOK_LPAREN )
1807  {
1808  err = PERR_MISSLPAREN;
1809  return -1;
1810  }
1811 
1812  int res = getExpr2( ctx, EXPR_FLAG_RIGHTPAREN_TERM_ALLOWED, pex );
1813  if ( res < 0 )
1814  return res;
1815 
1816  err = PERR_NONE;
1817  getToken( ctx, token );
1818  if ( token.id != TOK_RPAREN )
1819  {
1820  err = PERR_MISSRPAREN;
1821  return -1;
1822  }
1823  return 0;
1824 }
1825 
1827 {
1828  Token token;
1829  int res = peekToken( ctx, token );
1830  if ( res < 0 )
1831  return res;
1832  if ( token.id == TOK_LPAREN )
1833  {
1834  res = getExprInParens( ctx );
1835  if ( res < 0 )
1836  return res;
1837  }
1838  else if ( token.id == TOK_IDENT || token.id == TOK_FUNC || token.id == TOK_USERFUNC ||
1839  token.id == TOK_ARRAY || token.id == TOK_LBRACE )
1840  {
1841  res = getExpr2( ctx, EXPR_FLAG_SINGLE_ELEMENT );
1842  if ( res < 0 )
1843  return res;
1844  }
1845  else
1846  {
1847  INFO_PRINT << "Expected variable, function or parenthesized expression, got '" << token
1848  << "'\n";
1849  return -1;
1850  }
1851  return 0;
1852 }
1853 
1855 {
1856  Token token;
1857  int res = getToken( ctx, token );
1858  if ( res < 0 )
1859  {
1860  INFO_PRINT << ctx << "Error reading token, expected " << Token( tokenid, TYP_RESERVED ) << "\n";
1861  return res;
1862  }
1863 
1864  if ( token.id != tokenid )
1865  {
1866  INFO_PRINT << ctx << "Expected " << Token( tokenid, TYP_RESERVED ) << ", got " << token << "\n";
1867  return -1;
1868  }
1869  return 0;
1870 }
1871 
1873 {
1874  if ( !quiet )
1875  INFO_PRINT << "DO clause..\n";
1876  StoredTokenContainer* prog_tokens = &program->tokens;
1877  unsigned body_start = prog_tokens->next();
1879 
1880  Token endblock_tkn;
1881  int res;
1882  res = readblock( ctx, level, RSV_DOWHILE, NULL, &endblock_tkn );
1883  if ( res < 0 )
1884  return res;
1885 
1886  emit_leaveblock();
1887 
1888  program->update_dbg_pos( endblock_tkn );
1889 
1890  // continue should jump to where the WHILE expression is evaluated,
1891  // which is the next token after this block
1892  patchblock_continues( prog_tokens->next() );
1893  program->setstatementbegin();
1894 
1895  localscope.popblock( true ); // Pop only variables.
1897  if ( res < 0 )
1898  return res;
1899  program->append( StoredToken( Mod_Basic, RSV_JMPIFTRUE, TYP_RESERVED, body_start ) );
1900  // break should completely exit, of course.
1901  patchblock_breaks( prog_tokens->next() );
1902  localscope.popblock(); // Pop block.
1903 
1904  // do-while loops until its expression evaluates to false.
1905 
1906 
1907  program->leaveblock();
1908 
1909  return 0;
1910 }
1911 
1912 
1914 {
1915  if ( !quiet )
1916  INFO_PRINT << "REPEAT clause..\n";
1917  StoredTokenContainer* prog_tokens = &program->tokens;
1918  unsigned body_start = prog_tokens->next();
1920 
1921  Token endblock_tkn;
1922  int res;
1923  res = readblock( ctx, level, RSV_UNTIL, NULL, &endblock_tkn );
1924  if ( res < 0 )
1925  {
1926  return res;
1927  }
1928 
1929  emit_leaveblock();
1930 
1931  program->update_dbg_pos( endblock_tkn );
1932  // continue should jump to where the UNTIL expression is evaluated.
1933  patchblock_continues( prog_tokens->next() );
1934  program->setstatementbegin();
1935  localscope.popblock( true );
1937  if ( res < 0 )
1938  return res;
1939  // repeat-until loops until its expression evaluates to true.
1940  program->append( StoredToken( Mod_Basic, RSV_JMPIFFALSE, TYP_RESERVED, body_start ) );
1941  // break should completely exit, of course.
1942  patchblock_breaks( prog_tokens->next() );
1943  localscope.popblock();
1944 
1945 
1946  program->leaveblock();
1947 
1948  return 0;
1949 }
1950 
1952 {
1953  int res = getExprInParens( ctx ); // (expr) (parens required)
1954  if ( res < 0 )
1955  return res;
1956 
1957  unsigned default_posn = 0;
1958  unsigned casecmp_posn = 0;
1959  std::vector<unsigned> jmpend;
1960  std::vector<unsigned char> caseblock;
1961 
1962  program->append( StoredToken( Mod_Basic, INS_CASEJMP, TYP_RESERVED, 0 ), &casecmp_posn );
1963 
1964  // overview:
1965  // we grab some case OPTIONs,
1966  // then we grab some code.
1967  // until we peek-see an OPTION or an ENDCASE
1968  bool done = false;
1969  bool onlydefault = true;
1970  StoredTokenContainer* prog_tokens = &program->tokens;
1971  while ( !done )
1972  {
1973  bool anycases = false;
1974  for ( ;; )
1975  {
1976  Token token;
1977  res = peekToken( ctx, token );
1978  if ( res < 0 )
1979  return res;
1980  if ( token.id == TOK_LONG || token.id == CTRL_LABEL || token.id == TOK_STRING )
1981  {
1982  /*
1983  Okay, some trickery. First, we'll handle the 'default' label, 'cause
1984  it isn't tricky.
1985  The complication is that sometimes a label is actually a constant.
1986  */
1987  if ( token.id == CTRL_LABEL && stricmp( token.tokval(), "default" ) == 0 )
1988  {
1989  getToken( ctx, token );
1990  if ( default_posn != 0 )
1991  {
1992  INFO_PRINT << "CASE statement can have only one DEFAULT clause.\n";
1993  return -1;
1994  }
1995  default_posn = prog_tokens->next();
1996  anycases = true;
1997  continue;
1998  }
1999 
2000  /*
2001  A label may be a constant, say CONST_VAR: If it is, substitute that value.
2002  */
2003  if ( token.id == CTRL_LABEL )
2004  {
2005  substitute_constant( &token );
2006  /*
2007  A label that wasn't a constant can't be used in a CASE statement.
2008  If it's followed by a while, etc, it won't be flagged illegal, though.
2009  this may be a Bad Thing.
2010  */
2011  if ( token.id == CTRL_LABEL )
2012  {
2013  break;
2014  }
2015  Token dummy; // don't overwrite the token we just substituted
2016  getToken( ctx, dummy );
2017  }
2018  else if ( token.id == TOK_LONG || token.id == TOK_STRING )
2019  {
2020  /*
2021  If we read a Long or a String, it'll be followed by a colon.
2022  If it was a label, the colon is gone.
2023  */
2024  getToken( ctx, token );
2025 
2026  res = eatToken( ctx, RSV_COLON );
2027  if ( res < 0 )
2028  return res;
2029  }
2030 
2031  anycases = true;
2032  onlydefault = false;
2033  if ( token.id == TOK_LONG )
2034  {
2035  unsigned short offset = static_cast<unsigned short>( prog_tokens->next() );
2036  unsigned char* tmppch = reinterpret_cast<unsigned char*>( &offset );
2037  caseblock.push_back( tmppch[0] );
2038  caseblock.push_back( tmppch[1] );
2039  caseblock.push_back( CASE_TYPE_LONG ); // FIXME hardcoded
2040  tmppch = reinterpret_cast<unsigned char*>( &token.lval );
2041  caseblock.push_back( tmppch[0] );
2042  caseblock.push_back( tmppch[1] );
2043  caseblock.push_back( tmppch[2] );
2044  caseblock.push_back( tmppch[3] );
2045  }
2046  else if ( token.id == CTRL_LABEL || token.id == TOK_STRING )
2047  {
2048  if ( strlen( token.tokval() ) >= 254 )
2049  {
2050  INFO_PRINT << "String expressions in CASE statements must be <= 253 characters.\n";
2051  return -1;
2052  ;
2053  }
2054  unsigned short offset = static_cast<unsigned short>( prog_tokens->next() );
2055  unsigned char* tmppch = reinterpret_cast<unsigned char*>( &offset );
2056  caseblock.push_back( tmppch[0] );
2057  caseblock.push_back( tmppch[1] );
2058  caseblock.push_back( static_cast<unsigned char>( strlen( token.tokval() ) ) );
2059  const char* str = token.tokval();
2060  size_t len = strlen( str );
2061 
2062  for ( size_t i = 0; i < len; ++i )
2063  caseblock.push_back( str[i] );
2064  }
2065  }
2066  else if ( token.id == RSV_ENDSWITCH && anycases ) // only accept if OPTIONs exist!
2067  {
2068  getToken( ctx, token );
2069  done = true;
2070  break;
2071  }
2072  else
2073  { // something else. we'll assume a statement.
2074  break;
2075  }
2076  }
2077  if ( done )
2078  break;
2079 
2080 
2081  // we've grabbed the OPTIONs. Now grab the code, until we get to an OPTION or ENDCASE
2083  while ( ctx.s[0] )
2084  {
2085  Token token;
2086  res = peekToken( ctx, token );
2087  if ( res < 0 )
2088  return res;
2089  if ( token.id == RSV_ENDSWITCH )
2090  {
2091  // Only accept ENDCASE if OPTIONs exist!
2092  if ( anycases )
2093  {
2094  getToken( ctx, token );
2095  done = true;
2096  break;
2097  }
2098  }
2099  else if ( token.id == TOK_LONG )
2100  {
2101  break;
2102  }
2103  else if ( token.id == CTRL_LABEL )
2104  {
2105  if ( stricmp( token.tokval(), "default" ) == 0 )
2106  break;
2107 
2108  substitute_constant( &token );
2109  if ( token.id != CTRL_LABEL )
2110  break;
2111  }
2112  else if ( token.id == TOK_STRING )
2113  {
2114  break;
2115  }
2116 
2117  // we're about to grab code. there needs to have been at least one OPTION, then.
2118  if ( !anycases )
2119  {
2120  INFO_PRINT << "CASE statement with no options!\n"
2121  << "Found '" << token.tokval() << "'"
2122  << ( token.id == CTRL_LABEL ? " but no such constant is defined.\n"
2123  : " prematurely.\n" );
2124  return -1;
2125  }
2126 
2127  res = getStatement( ctx, level );
2128  if ( res < 0 )
2129  return res;
2130  }
2131  // NOTE, the "jump to -> jump to" optimizer will be helpful here, optimizing breaks.
2132 
2133  emit_leaveblock();
2134  patchblock_breaks( prog_tokens->next() );
2135 
2136  localscope.popblock();
2137  program->leaveblock();
2138 
2139  if ( !done )
2140  {
2141  unsigned jmpend_posn;
2142  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &jmpend_posn );
2143  jmpend.push_back( jmpend_posn );
2144  }
2145  }
2146 
2147  // patchblock_breaks( program->tokens.next() );
2148  // program->leaveblock();
2149 
2150  // if only a 'default' block was defined, print a warning
2151  if ( onlydefault && ( compilercfg.DisplayWarnings || compilercfg.ErrorOnWarning ) )
2152  {
2153  INFO_PRINT << "Warning: CASE block only has a DEFAULT clause defined.\n"
2154  << "near: " << curLine << "\n";
2156  throw std::runtime_error( "Warnings treated as errors." );
2157  else
2158  INFO_PRINT << ctx;
2159  }
2160 
2161  // if no default specified, pretend 'default' was specified at the end.
2162  if ( default_posn == 0 )
2163  default_posn = prog_tokens->next();
2164 
2165  // the default case must go at the end.
2166  // if (1)
2167  {
2168  unsigned char* tmppch = reinterpret_cast<unsigned char*>( &default_posn );
2169  caseblock.push_back( tmppch[0] );
2170  caseblock.push_back( tmppch[1] );
2171  caseblock.push_back( CASE_TYPE_DEFAULT );
2172  }
2173 
2174  while ( !jmpend.empty() )
2175  {
2176  unsigned posn = jmpend.back();
2177  jmpend.pop_back();
2178  patchoffset( posn, prog_tokens->next() );
2179  }
2180 
2181  // now, we have to emit the casecmp block.
2182  unsigned caseblock_posn;
2183  auto casecmp_raw = new unsigned char[caseblock.size()];
2184  for ( size_t i = 0; i < caseblock.size(); ++i )
2185  casecmp_raw[i] = caseblock[i];
2186  program->symbols.append( casecmp_raw, static_cast<unsigned int>( caseblock.size() ),
2187  caseblock_posn );
2188  delete[] casecmp_raw;
2189  patchoffset( casecmp_posn, caseblock_posn );
2190  return 0;
2191 }
2192 
2194 {
2195  Token itrvar;
2196 
2197  int res;
2198  CompilerContext foreach_ctx( ctx );
2199 
2200  res = getToken( ctx, itrvar );
2201  if ( res < 0 )
2202  return res;
2203  if ( itrvar.id != TOK_IDENT )
2204  {
2205  INFO_PRINT << "FOREACH iterator must be an identifier, got " << itrvar << "\n";
2206  return res;
2207  }
2208 
2209  res = eatToken( ctx, TOK_IN );
2210  if ( res < 0 )
2211  return res;
2212 
2213  /*
2214  The outer block is a hidden block. It can't be labelled, but the
2215  inner one can. This way, 'break' and 'continue' won't touch
2216  the iterator variable, expression, and counter that we have in
2217  this hidden block.
2218  */
2219 
2220  res = getSimpleExpr( ctx );
2221  if ( res < 0 )
2222  return res;
2223  /*
2224  When these are evaluated, the value stack should look like this:
2225  (result of EXPR)
2226  */
2228 
2229  unsigned initforeach_posn;
2230  program->append( StoredToken( Mod_Basic, INS_INITFOREACH, TYP_RESERVED, 0 ), &initforeach_posn );
2231  /*
2232  INITFOREACH creates three local variables, placeholders for the iterator,
2233  expression, and counter. Only the iterator can be accessed, for now.
2234  */
2235  program->addlocalvar( itrvar.tokval() );
2236  localscope.addvar( itrvar.tokval(), foreach_ctx );
2237  program->addlocalvar( "_" + std::string( itrvar.tokval() ) + "_expr" );
2238  localscope.addvar( "_" + std::string( itrvar.tokval() ) + "_expr", foreach_ctx, false );
2239  program->addlocalvar( "_" + std::string( itrvar.tokval() ) + "_counter" );
2240  localscope.addvar( "_" + std::string( itrvar.tokval() ) + "_iter", foreach_ctx, false );
2241 
2242 
2243  unsigned iter_posn = program->tokens.next();
2245  Token endforeach_token;
2246  res = readblock( ctx, level, RSV_ENDFOREACH, NULL, &endforeach_token );
2247  if ( res < 0 )
2248  return res;
2249 
2250  emit_leaveblock();
2251 
2252  unsigned stepforeach_posn;
2253  program->update_dbg_pos( endforeach_token );
2254  program->append( StoredToken( Mod_Basic, INS_STEPFOREACH, TYP_RESERVED, iter_posn ),
2255  &stepforeach_posn );
2256  patchoffset( initforeach_posn, stepforeach_posn );
2257 
2258  patchblock_continues( stepforeach_posn );
2259  patchblock_breaks( program->tokens.next() );
2260  localscope.popblock();
2261  program->leaveblock();
2262 
2263  // FIXME this isn't right - continue needs to refer to one block, while break
2264  // needs to refer to another!
2265  // ie continue should use the inner block, while break should use the outer.
2266  leaveblock( 0, 0 );
2267  return 0;
2268 }
2269 
2270 
2272 {
2273  Token token;
2274  int res = peekToken( ctx, token );
2275  if ( res )
2276  return res;
2277  if ( token.id == TOK_SEMICOLON )
2278  {
2279  getToken( ctx, token );
2280  unsigned posn = 0;
2281  program->symbols.append( "", posn );
2282  program->append( StoredToken( Mod_Basic, TOK_STRING, TYP_OPERAND, posn ) );
2283  }
2284  else
2285  {
2287  if ( res < 0 )
2288  return res;
2289  }
2290 
2291  if ( inFunction )
2292  {
2293  program->append( StoredToken( Mod_Basic, RSV_RETURN, TYP_RESERVED, 0 ), 0 );
2294  }
2295  else
2296  {
2297  program->append( StoredToken( Mod_Basic, CTRL_PROGEND, TYP_CONTROL, 0 ), 0 );
2298  }
2299  return 0;
2300 }
2301 
2303 {
2304  Token token;
2305  getToken( ctx, token );
2306  if ( token.id != TOK_SEMICOLON )
2307  {
2308  INFO_PRINT << "Missing ';'\n";
2310  return -1;
2311  }
2312 
2313  program->append( StoredToken( Mod_Basic, RSV_EXIT, TYP_RESERVED, 0 ) );
2314  return 0;
2315 }
2316 
2318 {
2319  Token token;
2320 
2321  if ( !quiet )
2322  INFO_PRINT << "BEGIN block..\n";
2323 
2325 
2326  while ( ctx.s[0] )
2327  {
2328  peekToken( ctx, token );
2329  if ( token.id == RSV_ENDB )
2330  break;
2331 
2332  if ( getStatement( ctx, level ) == -1 )
2333  return -1;
2334  }
2335  if ( !ctx.s[0] )
2336  return -1;
2337 
2338  getToken( ctx, token );
2339  if ( token.id != RSV_ENDB )
2340  {
2341  return -1;
2342  }
2343 
2344 
2345  leaveblock( 0, 0 );
2346 
2347  return 0;
2348 }
2349 
2351 {
2352  Token token;
2353  Token funcName;
2354  int res;
2355 
2356  userfunc.ctx = ctx;
2357 
2358  res = getToken( ctx, funcName );
2359  if ( res )
2360  return res;
2361  bool first_time = true;
2362  if ( first_time )
2363  {
2364  if ( funcName.id != TOK_IDENT )
2365  {
2366  if ( funcName.id == TOK_FUNC )
2367  {
2368  INFO_PRINT << "'" << funcName.tokval() << "' is already defined as a function.\n"
2369  << "Near: " << curLine << "\n"
2370  << ctx;
2371  return -1;
2372  }
2373  else
2374  {
2375  INFO_PRINT << "Expected an identifier, got " << funcName << " instead.\n"
2376  << "Near: " << curLine << "\n"
2377  << ctx;
2378  return -1;
2379  }
2380  }
2381  }
2382  userfunc.name = funcName.tokval();
2383  Token lparen;
2384  res = getToken( ctx, lparen );
2385  if ( res )
2386  return res;
2387  if ( lparen.id != TOK_LPAREN )
2388  {
2389  return -1;
2390  }
2391  /*
2392  We have eaten the left paren. Next follows:
2393  RIGHT_PAREN, OR ( [refto] varname [:= default ] { COMMA or RIGHT_PAREN })
2394  */
2395  peekToken( ctx, token );
2396  for ( ;; )
2397  {
2398  Token paramName;
2399  res = getToken( ctx, token );
2400  if ( res )
2401  return -1;
2402 
2403  bool pass_by_reference = false;
2404  bool unused = false;
2405 
2406  if ( token.id == TOK_RPAREN )
2407  break;
2408  if ( token.id == TOK_REFTO )
2409  {
2410  pass_by_reference = true;
2411  res = getToken( ctx, token );
2412  if ( res )
2413  return -1;
2414  }
2415  if ( token.id == TOK_UNUSED )
2416  {
2417  unused = true;
2418  res = getToken( ctx, token );
2419  if ( res )
2420  return -1;
2421  }
2422  if ( token.id != TOK_IDENT )
2423  {
2424  return -1;
2425  }
2426  userfunc.parameters.resize( userfunc.parameters.size() + 1 );
2427  UserParam& param = userfunc.parameters.back();
2428  param.name = token.tokval();
2429  param.pass_by_reference = pass_by_reference;
2430  param.unused = unused;
2431  peekToken( ctx, token );
2432  if ( token.id == TOK_ASSIGN )
2433  {
2434  // We have a default argument.
2435  if ( unused )
2436  {
2437  INFO_PRINT << "Default arguments are not allowed in unused parameters\n";
2438  return -1;
2439  }
2440 
2441  param.have_default = 1;
2442  getToken( ctx, token ); // Eat the assignment operator
2443 
2444  Expression ex;
2446  1 )
2447  {
2448  INFO_PRINT << "Error reading expression in const declaration\n";
2449  return -1;
2450  }
2451 
2452  if ( ex.tokens.size() != 1 )
2453  {
2454  INFO_PRINT << "Const expression must be optimizable\n";
2455  return -1;
2456  }
2457 
2458  param.dflt_value = *( ex.tokens.back() );
2459  if ( param.dflt_value.type != TYP_OPERAND )
2460  {
2461  INFO_PRINT << "[" << funcName.tokval()
2462  << "]: Only simple operands are allowed as default arguments (" << token
2463  << " is not allowed)\n";
2464  return -1;
2465  }
2466  peekToken( ctx, token );
2467  }
2468  else if ( token.id == TOK_EQUAL1 )
2469  {
2470  INFO_PRINT << "Unexpected token: '" << token << "'. Did you mean := for assign?\n";
2471  return -1;
2472  }
2473  else
2474  {
2475  param.have_default = 0;
2476  }
2477 
2478  if ( token.id == TOK_COMMA )
2479  {
2480  getToken( ctx, token );
2481  continue;
2482  }
2483  else if ( token.id == TOK_RPAREN )
2484  {
2485  continue;
2486  }
2487  else
2488  return -1;
2489  }
2490  return 0;
2491 }
2492 
2494 {
2495  int res;
2496  Token token;
2497  res = getToken( ctx, token );
2498  if ( res )
2499  return res;
2500  if ( token.id != RSV_FUNCTION )
2501  {
2502  return -1;
2503  }
2504  Token funcName;
2505  res = getToken( ctx, funcName );
2506  if ( res )
2507  return res;
2508  if ( funcName.id != TOK_IDENT )
2509  {
2510  throw std::runtime_error( "Tried to declare a non-identifier" );
2511  }
2512  Token lparen;
2513  res = getToken( ctx, lparen );
2514  if ( res )
2515  return res;
2516  if ( lparen.id != TOK_LPAREN )
2517  {
2518  return -1;
2519  }
2520  int nParams = 0;
2521  for ( ;; )
2522  {
2523  res = getToken( ctx, token );
2524  if ( res )
2525  return -1;
2526  if ( token.id == TOK_RPAREN )
2527  {
2528  break;
2529  }
2530  if ( token.id != TOK_IDENT )
2531  {
2532  return -1;
2533  }
2534  nParams++;
2535  peekToken( ctx, token );
2536  if ( token.id == TOK_COMMA )
2537  {
2538  getToken( ctx, token );
2539  continue;
2540  }
2541  else if ( token.id == TOK_RPAREN )
2542  {
2543  continue;
2544  }
2545  else
2546  {
2547  return -1;
2548  }
2549  }
2550  getToken( ctx, token );
2551  if ( token.id != TOK_SEMICOLON )
2552  {
2554  return -1;
2555  }
2556  INFO_PRINT << "func decl: " << curLine << "\n"
2557  << "nParams: " << nParams << "\n";
2558  // addUserFunc(funcName.tokval(), nParams);
2559  return 0;
2560 }
2561 
2562 int Compiler::handleIf( CompilerContext& ctx, int level )
2563 {
2564  // unsigned if_begin;
2565  unsigned jump_false;
2566  Token token;
2567  // if_begin = program->tokens.next();
2568  jump_false = 0;
2569  if ( !quiet )
2570  INFO_PRINT << "if clause..\n";
2571 
2572 
2573  int res = getExprInParens( ctx ); // (expr) (parens required)
2574  if ( res < 0 )
2575  return res;
2576 
2577  unsigned if_token_posn;
2578  program->append( StoredToken( Mod_Basic, RSV_JMPIFFALSE, TYP_RESERVED, 0 ), &if_token_posn );
2579 
2580  // THEN is optional, currently.
2581  peekToken( ctx, token );
2582  if ( token.id == RSV_THEN )
2583  getToken( ctx, token ); // 'then'
2584  if ( !quiet )
2585  INFO_PRINT << "then clause..\n";
2586 
2587  // get the part we do
2588  res = getStatement( ctx, level );
2589  if ( res < 0 )
2590  return res;
2591 
2592  // if an ELSE follows, grab the ELSE and following statement
2593  peekToken( ctx, token );
2594  StoredTokenContainer* prog_tokens = &program->tokens;
2595  if ( token.id == RSV_ELSE )
2596  {
2597  unsigned else_token_posn;
2598  // this GOTO makes execution skip the ELSE part if the IF was true
2599  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &else_token_posn );
2600  jump_false = prog_tokens->next();
2601  getToken( ctx, token ); // eat the else
2602  if ( !quiet )
2603  INFO_PRINT << "else clause..\n";
2604  getStatement( ctx, level );
2605  // now that we know where the ELSE part ends, patch in the address
2606  // which skips past it.
2607  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, prog_tokens->next() ),
2608  else_token_posn );
2609  }
2610  else
2611  {
2612  jump_false = prog_tokens->next();
2613  }
2614 
2615 
2616  // patch up orig. IF token to skip past if false
2617  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_JMPIFFALSE, TYP_RESERVED, jump_false ),
2618  if_token_posn );
2619  return 0;
2620 }
2621 
2622 bool mismatched_end( const Token& token, BTokenId correct )
2623 {
2624  if ( token.id == correct )
2625  {
2626  return false;
2627  }
2628  else if ( token.id == RSV_ENDFOREACH || token.id == RSV_ENDIF ||
2629  // token.id == RSV_ENDB ||
2630  token.id == RSV_DOWHILE || token.id == RSV_ENDWHILE || token.id == RSV_UNTIL ||
2631  token.id == RSV_ENDFOR || token.id == RSV_ENDFUNCTION || token.id == RSV_ENDSWITCH ||
2632  token.id == RSV_ENDPROGRAM || token.id == RSV_ENDENUM )
2633  {
2634  Token t( correct, TYP_RESERVED );
2635  INFO_PRINT << "Expected " << t << " before " << token << "\n";
2636  return true;
2637  }
2638  else
2639  {
2640  return false;
2641  }
2642 }
2643 
2645 {
2646  CompilerContext save_ctx( ctx );
2647 
2648  std::vector<unsigned> jumpend;
2649 
2650  Token token;
2651  if ( !quiet )
2652  INFO_PRINT << "if clause..\n";
2653 
2654  token.id = RSV_ST_IF;
2655 
2656  EScriptProgramCheckpoint checkpt( *program );
2657  size_t jumpend_size = jumpend.size();
2658 
2659  bool discard_rest = false;
2660  // bool discarded_all = true;
2661  bool included_any_tests = false;
2662  unsigned last_if_token_posn = static_cast<unsigned>( -1 );
2663  unsigned if_token_posn = static_cast<unsigned>( -1 );
2664  StoredTokenContainer* prog_tokens = &program->tokens;
2665  while ( token.id == RSV_ST_IF || token.id == RSV_ELSEIF )
2666  {
2667  EScriptProgramCheckpoint checkpt_expr( *program );
2668  // dump(cout);
2669  program->setstatementbegin();
2670  Expression ex;
2671  int res = getExprInParens( ctx, &ex ); // (expr) (parens required)
2672  if ( res < 0 )
2673  return res;
2674  // dump(cout);
2675  bool patch_if_token = true;
2676  last_if_token_posn = if_token_posn;
2677  if ( ex.tokens.back()->id == TOK_LOG_NOT )
2678  {
2679  if_token_posn = prog_tokens->count() - 1;
2680  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_JMPIFTRUE, TYP_RESERVED, 0 ),
2681  if_token_posn );
2682  }
2683  else
2684  {
2685  program->append( StoredToken( Mod_Basic, RSV_JMPIFFALSE, TYP_RESERVED, 0 ), &if_token_posn );
2686  }
2687 
2688  bool discard_this = discard_rest;
2689 
2690  if ( !discard_rest && ex.tokens.size() == 1 && ex.tokens[0]->id == TOK_LONG )
2691  {
2692  if ( ex.tokens[0]->lval )
2693  {
2694  discard_rest = true;
2695  }
2696  else
2697  {
2698  discard_this = true;
2699  }
2700  rollback( *program, checkpt_expr ); // don't need the expression or the jump,
2701  // even if we're keeping the block
2702  patch_if_token = false;
2703  }
2704  else
2705  {
2706  // discarded_all = discard_rest;
2707  if ( !discard_rest )
2708  included_any_tests = true;
2709  }
2710 
2711 
2712  // THEN is optional, currently.
2713  peekToken( ctx, token );
2714  if ( token.id == RSV_THEN )
2715  getToken( ctx, token ); // 'then'
2716  if ( !quiet )
2717  INFO_PRINT << "then clause..\n";
2718 
2719  // dump(cout);
2720  // get the part we do
2722  while ( ctx.s[0] )
2723  {
2724  peekToken( ctx, token );
2725  if ( token.id == RSV_ELSEIF || token.id == RSV_ELSE || token.id == RSV_ENDIF )
2726  {
2727  break;
2728  }
2729 
2730  res = getStatement( ctx, level );
2731  if ( res < 0 )
2732  {
2733  INFO_PRINT << "Error in IF statement starting at " << save_ctx;
2734 
2735  return res;
2736  }
2737  }
2738  leaveblock( 0, 0 );
2739  if ( !ctx.s[0] )
2740  return -1;
2741 
2742  // dump(cout);
2743  if ( !discard_this )
2744  {
2745  checkpt.commit( *program );
2746  jumpend_size = jumpend.size();
2747  }
2748  // dump(cout);
2749  // dump(cout);
2750 
2751  // this will be committed only when the next ELSEIF or ELSE is committed
2752  if ( token.id == RSV_ELSEIF || token.id == RSV_ELSE )
2753  {
2754  unsigned temp_posn;
2755  program->update_dbg_pos( token );
2756  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &temp_posn );
2757  jumpend.push_back( temp_posn );
2758 
2759  if ( token.id == RSV_ELSEIF )
2760  {
2761  readCurLine( ctx );
2762  savesourceline();
2763  getToken( ctx, token );
2764  }
2765  }
2766  if ( patch_if_token )
2767  {
2768  StoredToken tkn;
2769  prog_tokens->atGet1( if_token_posn, tkn );
2770  tkn.offset = static_cast<unsigned short>( prog_tokens->next() );
2771  prog_tokens->atPut1( tkn, if_token_posn );
2772  }
2773 
2774  // dump(cout);
2775  if ( discard_this )
2776  {
2777  rollback( *program, checkpt );
2778  while ( jumpend.size() > jumpend_size )
2779  jumpend.pop_back();
2780 
2781  if ( last_if_token_posn != static_cast<unsigned>( -1 ) &&
2782  last_if_token_posn < prog_tokens->count() )
2783  {
2784  StoredToken tkn;
2785  prog_tokens->atGet1( last_if_token_posn, tkn );
2786  tkn.offset = static_cast<unsigned short>( prog_tokens->next() );
2787  prog_tokens->atPut1( tkn, last_if_token_posn );
2788  }
2789  }
2790  // dump(cout);
2791  }
2792 
2793  peekToken( ctx, token );
2794  if ( token.id != RSV_ENDIF && token.id != RSV_ELSE )
2795  {
2796  INFO_PRINT << "Expected ELSE or ENDIF after IF statement starting at " << save_ctx
2797  << "Did not expect: " << token << "\n";
2798  return -1;
2799  }
2800 
2801  // if an ELSE follows, grab the ELSE and following statement
2802  if ( token.id == RSV_ELSE )
2803  {
2804  getToken( ctx, token ); // eat the else
2805  if ( !quiet )
2806  INFO_PRINT << "else clause..\n";
2808  while ( ctx.s[0] )
2809  {
2810  peekToken( ctx, token );
2811  if ( token.id == RSV_ENDIF )
2812  break;
2813  if ( mismatched_end( token, RSV_ENDIF ) )
2814  return -1;
2815 
2816  int res = getStatement( ctx, level );
2817  if ( res < 0 )
2818  return res;
2819  }
2820  leaveblock( 0, 0 );
2821  }
2822  // eat the ENDIF
2823  if ( !ctx.s[0] )
2824  return -1;
2825  getToken( ctx, token );
2826  if ( token.id != RSV_ENDIF )
2827  return -1;
2828 
2829  if ( discard_rest && !included_any_tests )
2830  {
2831  rollback( *program, checkpt );
2832  while ( jumpend.size() > jumpend_size )
2833  jumpend.pop_back();
2834  }
2835 
2836 
2837  while ( !jumpend.empty() )
2838  {
2839  unsigned pc = jumpend.back();
2840  jumpend.pop_back();
2841  // patch up orig. IF token to skip past if false
2842 
2843  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, prog_tokens->next() ),
2844  pc );
2845  }
2846 
2847  return 0;
2848 }
2849 
2850 
2851 int Compiler::readblock( CompilerContext& ctx, int level, BTokenId endtokenid,
2852  BTokenId* last_statement_id, Token* pBlockEndToken )
2853 {
2854  CompilerContext tctx( ctx );
2855  int res;
2856  while ( ctx.s[0] )
2857  {
2858  Token token;
2859  res = peekToken( ctx, token );
2860  if ( res < 0 )
2861  return res;
2862  if ( token.id == endtokenid )
2863  {
2864  if ( pBlockEndToken != NULL )
2865  getToken( ctx, *pBlockEndToken );
2866  else
2867  getToken( ctx, token ); // eat the end-token
2868  return 0;
2869  }
2870  if ( last_statement_id != NULL )
2871  *last_statement_id = token.id;
2872  res = getStatement( ctx, level );
2873  if ( res < 0 )
2874  return res;
2875  }
2876  INFO_PRINT << "Error in block beginning at " << tctx << "End-of-File detected, expected '"
2877  << Token( Mod_Basic, endtokenid, TYP_RESERVED ) << "'\n";
2878  return -1;
2879 }
2880 
2882 {
2883  if ( !quiet )
2884  INFO_PRINT << "while clause..\n";
2885  StoredTokenContainer* prog_tokens = &program->tokens;
2886  unsigned conditional_expr_posn = prog_tokens->next();
2887  int res = getExprInParens( ctx ); // (expr) (parens required)
2888  if ( res < 0 )
2889  return res;
2890 
2891  unsigned test_expr_token_posn;
2893  &test_expr_token_posn );
2894 
2896 
2897  Token endblock_tkn;
2898  res = readblock( ctx, level, RSV_ENDWHILE, NULL, &endblock_tkn );
2899  if ( res < 0 )
2900  return res;
2901 
2902  program->update_dbg_pos( endblock_tkn );
2903  emit_leaveblock();
2904 
2905  // jump back to conditional expression
2906  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, conditional_expr_posn ), 0 );
2907 
2908  // Control should jump past the loop when the expr evaluates to false.
2909  unsigned exit_loop_posn = prog_tokens->next();
2910  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_JMPIFFALSE, TYP_RESERVED, exit_loop_posn ),
2911  test_expr_token_posn );
2912 
2913  // continues re-test the expression, which is at the top of the loop.
2914  patchblock_continues( conditional_expr_posn );
2915  // breaks exit the loop, which has been wholly emitted by now.
2916  patchblock_breaks( prog_tokens->count() );
2917 
2918  localscope.popblock();
2919  program->leaveblock();
2920 
2921  return 0;
2922 }
2923 
2924 /*
2925 allowed formats for declare:
2926 local a;
2927 local a array;
2928 local a := 5;
2929 local a := expr;
2930 local a, b;
2931 local a, b := expr, ...;
2932 */
2933 int Compiler::handleVarDeclare( CompilerContext& ctx, unsigned save_id )
2934 {
2935  CompilerContext savectx( ctx );
2936  Token tk_varname, tk_delim;
2937  int done = 0;
2938  if ( save_id == RSV_GLOBAL && !inGlobalScope() )
2939  {
2940  INFO_PRINT << "Globals can only be declared at global scope.\n";
2941  return -1;
2942  }
2943  if ( save_id == RSV_LOCAL && inGlobalScope() )
2944  {
2945  INFO_PRINT << "Locals can only be declared within a block or function.\n";
2946  return -1;
2947  }
2948 
2949  do
2950  {
2951  /*
2952  formats: varname followed by comma or semicolon
2953  varname followed by "array", then by comma/semicolon
2954  varname followed by ':=', then an initializer, then comma/semicolon.
2955  */
2956  CompilerContext thisctx( ctx );
2957 
2958  getToken( ctx, tk_varname );
2959  if ( tk_varname.id != TOK_IDENT )
2960  {
2961  INFO_PRINT << "Non-identifier declared as a variable: '" << tk_varname.tokval() << "'\n"
2962  << "Token: " << tk_varname << "\n";
2963  return -1;
2964  }
2965 
2966  if ( constants.find( tk_varname.tokval() ) != constants.end() )
2967  {
2968  INFO_PRINT << tk_varname.tokval() << " is already a defined constant.\n";
2969  return -1;
2970  }
2971 
2972  // Add this variable to the current block/scope
2973  unsigned varindex = 0;
2974 
2975  if ( inGlobalScope() )
2976  {
2977  unsigned idx;
2978  CompilerContext gctx;
2979  if ( !globalexists( tk_varname.tokval(), idx, &gctx ) )
2980  {
2981  Variable v;
2982  v.name = tk_varname.tokval();
2983  v.used = true;
2984  v.ctx = savectx;
2985 
2986  varindex = static_cast<unsigned>( globals_.size() );
2987  globals_.push_back( v );
2988  program->globalvarnames.push_back( tk_varname.tokval() );
2989  }
2990  else
2991  {
2992  INFO_PRINT << "Global Variable '" << tk_varname.tokval() << "' is already declared at "
2993  << gctx;
2994  return -1;
2995  }
2996  }
2997  else
2998  {
2999  unsigned idx;
3001  globalexists( tk_varname.tokval(), idx ) )
3002  {
3003  INFO_PRINT << "Warning: Local variable '" << tk_varname.tokval()
3004  << "' hides Global variable of same name.\n";
3006  throw std::runtime_error( "Warnings treated as errors." );
3007  else
3008  INFO_PRINT << ctx;
3009  }
3010  varindex = localscope.numVariables();
3011  program->addlocalvar( tk_varname.tokval() );
3012 
3013  varindex = localscope.numVarsInBlock();
3014  localscope.addvar( tk_varname.tokval(), ctx );
3015  }
3016 
3017  // grab the comma, semicolon, := or array declare token
3018  getToken( ctx, tk_delim );
3019 
3020  // note save_id is RSV_LOCAL or RSV_GLOBAL
3021  program->append( StoredToken( Mod_Basic, save_id, TYP_RESERVED, varindex ), thisctx );
3022 
3023  if ( tk_delim.id == TOK_ARRAY )
3024  {
3025  // declaring an array.
3027  {
3028  INFO_PRINT << "Warning! Deprecated array-declaration syntax used.\n";
3030  throw std::runtime_error( "Warnings treated as errors." );
3031  else
3032  INFO_PRINT << ctx;
3033  }
3035 
3036  getToken( ctx, tk_delim );
3037  }
3038  else if ( tk_delim.id == TOK_ASSIGN )
3039  {
3040  int res;
3042  if ( res <= 0 )
3043  return res;
3045 
3046  getToken( ctx, tk_delim );
3047  }
3049 
3050  if ( tk_delim.id == TOK_COMMA )
3051  {
3052  continue;
3053  }
3054  else if ( tk_delim.id == TOK_SEMICOLON )
3055  {
3056  break;
3057  }
3058  else if ( tk_delim.id == TOK_EQUAL1 )
3059  {
3060  INFO_PRINT << "Unexpected token: '" << tk_delim << "'. Did you mean := for assign?\n";
3061  return -1;
3062  }
3063  else
3064  {
3065  INFO_PRINT << "Unexpected token: " << tk_delim << "\n";
3066  return -1;
3067  }
3068  } while ( !done );
3069 
3070  return 0;
3071 
3072  // FIXME: Dead code since ages, left here because I have no idea if bug or feature...
3073  // // insert a consumer to eat the evaluated result from the expr.
3074  // program->append( StoredToken( Mod_Basic, TOK_CONSUMER, TYP_UNARY_OPERATOR, 0 ) );
3075  // return 0;
3076 }
3077 
3078 /*
3079 allowed formats for declaring const:
3080 const a := expr;
3081 */
3083 {
3084  Token tk_varname, tk_assign;
3085  // int done = 0;
3086 
3087 
3088  /*
3089  formats: varname followed by comma or semicolon
3090  varname followed by "array", then by comma/semicolon
3091  varname followed by ':=', then an initializer, then comma/semicolon.
3092  */
3093  getToken( ctx, tk_varname );
3094  if ( tk_varname.id != TOK_IDENT )
3095  {
3096  INFO_PRINT << "Expected identifier after const declaration\n";
3097  return -1;
3098  }
3099 
3100  if ( constants.count( tk_varname.tokval() ) )
3101  {
3102  INFO_PRINT << "Constant " << tk_varname << " has already been defined.\n";
3103  return -1;
3104  }
3105 
3106  // grab the := token
3107  getToken( ctx, tk_assign );
3108  if ( tk_assign.id != TOK_ASSIGN )
3109  {
3110  INFO_PRINT << "Expected := after identifier in const declaration\n";
3111  return -1;
3112  }
3113 
3114  Expression ex;
3115  if ( readexpr( ex, ctx, EXPR_FLAG_SEMICOLON_TERM_ALLOWED ) != 1 )
3116  {
3117  INFO_PRINT << "Error reading expression in const declaration\n";
3118  return -1;
3119  }
3120 
3121  if ( ex.tokens.size() != 1 )
3122  {
3123  INFO_PRINT << "Const expression must be optimizable\n";
3124  return -1;
3125  }
3126 
3127  constants.insert( Constants::value_type( tk_varname.tokval(), *ex.tokens.back() ) );
3128  delete ex.tokens.back();
3129  ex.tokens.pop_back();
3130 
3131  return 0;
3132 }
3133 
3134 /*
3135 allowed formats for declaring enum
3136 enum FOO
3137 const a := expr;
3138 */
3140 {
3141  Token tk_enum_tag;
3142 
3143  // First, grab the enum tag.
3144  // TODO: validate it isn't already used
3145 
3146  if ( getToken( ctx, tk_enum_tag ) < 0 )
3147  {
3148  INFO_PRINT << "Error reading enum tag\n";
3149  return -1;
3150  }
3151 
3152  if ( tk_enum_tag.id != TOK_IDENT )
3153  {
3154  INFO_PRINT << "Expected an enum tag after 'enum'\n";
3155  return -1;
3156  }
3157 
3158  int next_counter = 0;
3159  for ( ;; )
3160  {
3161  Token tk_varname, tk_assign;
3162  // int done = 0;
3163  if ( getToken( ctx, tk_varname ) < 0 )
3164  {
3165  INFO_PRINT << "Error reading identifier in enum declaration\n";
3166  return -1;
3167  }
3168 
3169  if ( tk_varname.id == RSV_ENDENUM )
3170  return 0;
3171 
3172  if ( tk_varname.id != TOK_IDENT )
3173  {
3174  INFO_PRINT << "Expected identifier in enum statement, got " << tk_varname << "\n";
3175  return -1;
3176  }
3177 
3178  Token tmp;
3179  // now, the forms. THis should be followed by a comma, an 'endenum', or a ':='
3180  if ( peekToken( ctx, tmp ) < 0 )
3181  {
3182  INFO_PRINT << "Error reading token in enum statement\n";
3183  return -1;
3184  }
3185  if ( tmp.id == TOK_ASSIGN )
3186  {
3187  Token _tmp;
3188  getToken( ctx, _tmp );
3189  Expression ex;
3190  // FIXME doesn't work if expression is right before enum
3192  {
3193  INFO_PRINT << "Error reading expression in enum declaration\n";
3194  return -1;
3195  }
3196  if ( !peekToken( ctx, _tmp ) )
3197  { // might be a comma, or an endenum
3198  if ( _tmp.id == TOK_COMMA )
3199  getToken( ctx, _tmp );
3200  }
3201  if ( ex.tokens.size() != 1 )
3202  {
3203  INFO_PRINT << "Enum expression must be optimizable\n";
3204  return -1;
3205  }
3206  Token* tkn = ex.tokens.back();
3207  if ( tkn->id == TOK_LONG )
3208  next_counter = tkn->lval + 1;
3209  else
3210  ++next_counter;
3211  constants.insert( Constants::value_type( tk_varname.tokval(), *tkn ) );
3212  delete tkn;
3213  ex.tokens.pop_back();
3214  }
3215  else if ( tmp.id == TOK_COMMA )
3216  {
3217  getToken( ctx, tmp );
3218  Token tkn( TOK_LONG, TYP_OPERAND );
3219  tkn.lval = next_counter++;
3220  constants.insert( Constants::value_type( tk_varname.tokval(), tkn ) );
3221  }
3222  else if ( tmp.id == RSV_ENDENUM )
3223  {
3224  Token tkn( TOK_LONG, TYP_OPERAND );
3225  tkn.lval = next_counter++;
3226  constants.insert( Constants::value_type( tk_varname.tokval(), tkn ) );
3227  // we'll pick this one up next pass
3228  }
3229  else if ( tmp.id == TOK_EQUAL1 )
3230  {
3231  INFO_PRINT << "Unexpected token: '" << tmp << "'. Did you mean := for assign?\n";
3232  return -1;
3233  }
3234  else
3235  {
3236  INFO_PRINT << "Unexpected token " << tmp << " in enum statement\n";
3237  return -1;
3238  }
3239  }
3240 }
3241 
3242 
3243 int Compiler::useModule( const char* modulename )
3244 {
3245  for ( const auto& elem : program->modules )
3246  {
3247  if ( modulename == elem->modulename )
3248  return 0;
3249  }
3250 
3251  std::unique_ptr<FunctionalityModule> compmodl( new FunctionalityModule( modulename ) );
3252 
3253  std::string filename_part = modulename;
3254  filename_part += ".em";
3255 
3256  std::string filename_full = current_file_path + filename_part;
3257 
3258  if ( verbosity_level_ >= 10 )
3259  INFO_PRINT << "Searching for " << filename_full << "\n";
3260 
3261  if ( !Clib::FileExists( filename_full.c_str() ) )
3262  {
3263  std::string try_filename_full = compilercfg.ModuleDirectory + filename_part;
3264  if ( verbosity_level_ >= 10 )
3265  INFO_PRINT << "Searching for " << try_filename_full << "\n";
3266  if ( Clib::FileExists( try_filename_full.c_str() ) )
3267  {
3268  if ( verbosity_level_ >= 10 )
3269  INFO_PRINT << "Found " << try_filename_full << "\n";
3270  // cout << "Using " << try_filename << endl;
3271  filename_full = try_filename_full;
3272  }
3273  }
3274  else
3275  {
3276  if ( verbosity_level_ >= 10 )
3277  INFO_PRINT << "Found " << filename_full << "\n";
3278  }
3279 
3280  char* orig_mt;
3281  char* mt;
3282 
3283  if ( getFileContents( filename_full.c_str(), &orig_mt ) )
3284  {
3285  INFO_PRINT << "Unable to find module " << modulename << "\n"
3286  << "\t(Filename: " << filename_full << ")\n";
3287  return -1;
3288  }
3289 
3290  mt = orig_mt;
3291  CompilerContext mod_ctx( filename_full, program->add_dbg_filename( filename_full ), mt );
3292 
3293  std::string save = current_file_path;
3294  current_file_path = getpathof( filename_full );
3295 
3296  int res = -1;
3297  for ( ;; )
3298  {
3299  Token tk_dummy;
3300  res = peekToken( mod_ctx, tk_dummy );
3301  if ( res < 0 )
3302  {
3303  INFO_PRINT << "Error reading token in module " << modulename << "\n";
3304  free( orig_mt );
3305  break;
3306  }
3307  else if ( res == 1 )
3308  {
3309  addModule( compmodl.release() );
3310  free( orig_mt );
3311  res = 0;
3312  break;
3313  }
3314  if ( tk_dummy.id == RSV_CONST )
3315  {
3316  getToken( mod_ctx, tk_dummy );
3317  int _res = handleConstDeclare( mod_ctx );
3318  if ( _res < 0 )
3319  break;
3320  else
3321  continue;
3322  }
3323  std::unique_ptr<UserFunction> puserfunc( new UserFunction );
3324  if ( readFunctionDeclaration( mod_ctx, *puserfunc ) )
3325  {
3326  INFO_PRINT << "Error reading function declaration in module " << modulename << "\n";
3327  free( orig_mt );
3328  res = -1;
3329  break;
3330  }
3331 
3332  Token tk_semicolon;
3333  if ( getToken( mod_ctx, tk_semicolon ) )
3334  {
3335  INFO_PRINT << filename_full << ": Error in declaration for " << puserfunc->name << ":\n"
3336  << " Expected a semicolon, got end-of-file or error\n";
3337 
3338  free( orig_mt );
3339  res = -1;
3340  break;
3341  }
3342  if ( tk_semicolon.id != TOK_SEMICOLON )
3343  {
3344  INFO_PRINT << filename_full << ": Error in declaration for " << puserfunc->name << ":\n"
3345  << " Expected a semicolon, got '" << tk_semicolon << "'\n";
3346  free( orig_mt );
3347  res = -1;
3348  break;
3349  }
3350 
3351  UserFunction* uf = puserfunc.release();
3352  compmodl->addFunction( uf->name.c_str(), static_cast<int>( uf->parameters.size() ), uf );
3353  }
3354  current_file_path = save;
3355  return res;
3356 }
3357 
3358 /*
3359 Format:
3360 Use module;
3361 
3362 Consider: at the beginning of program parsing, peek at the first token.
3363 While it's a 'Use', handle it. This way, from looking at the top of a program, you
3364 can tell what modules it uses - a simple way for someone to guage a script's
3365 capabilities / risks, from a security standpoint.
3366 */
3368 {
3369  Token tk_module_name, tk_semicolon;
3370 
3371  if ( getToken( ctx, tk_module_name ) )
3372  {
3373  INFO_PRINT << "Error in USE statement: USE should be followed by a module name.\n";
3374  return -1;
3375  }
3376  if ( tk_module_name.id != TOK_IDENT )
3377  {
3378  INFO_PRINT << "Error in USE statement: Expected identifier, got '" << tk_module_name << "'\n";
3379  return -1;
3380  }
3381 
3382  if ( getToken( ctx, tk_semicolon ) )
3383  {
3384  INFO_PRINT << "Error in USE statement (module " << tk_module_name << "): "
3385  << "Expected ';', got end-of-file or error\n";
3386  return -1;
3387  }
3388  if ( tk_semicolon.id != TOK_SEMICOLON )
3389  {
3390  INFO_PRINT << "Error in USE statement (module " << tk_module_name << "): "
3391  << "Expected ';', got '" << tk_semicolon << "'\n";
3392  return -1;
3393  }
3394 
3395  if ( strlen( tk_module_name.tokval() ) > 10 )
3396  {
3397  INFO_PRINT << "Error in USE statement: Module names must be <= 10 characters\n"
3398  << "Module specified was: '" << tk_module_name << "'\n";
3399  return -1;
3400  }
3401 
3402  return useModule( tk_module_name.tokval() );
3403 }
3404 
3405 int Compiler::includeModule( const std::string& modulename )
3406 {
3407  // cout << "includeModule(" << modulename << "). includes(" << included.size() << "):";
3408  // for( INCLUDES::const_iterator citr = included.begin(); citr != included.end(); ++citr )
3409  // {
3410  // cout << " " << (*citr);
3411  // }
3412  // cout << endl;
3413 
3414  std::string filename_part = modulename;
3415  filename_part += ".inc";
3416 
3417  std::string filename_full = current_file_path + filename_part;
3418 
3419  if ( filename_part[0] == ':' )
3420  {
3421  const Plib::Package* pkg = NULL;
3422  std::string path;
3423  if ( Plib::pkgdef_split( filename_part, NULL, &pkg, &path ) )
3424  {
3425  if ( pkg != NULL )
3426  {
3427  filename_full = pkg->dir() + path;
3428  std::string try_filename_full = pkg->dir() + "include/" + path;
3429 
3430  if ( verbosity_level_ >= 10 )
3431  INFO_PRINT << "Searching for " << filename_full << "\n";
3432 
3433  if ( !Clib::FileExists( filename_full.c_str() ) )
3434  {
3435  if ( verbosity_level_ >= 10 )
3436  INFO_PRINT << "Searching for " << try_filename_full << "\n";
3437  if ( Clib::FileExists( try_filename_full.c_str() ) )
3438  {
3439  if ( verbosity_level_ >= 10 )
3440  INFO_PRINT << "Found " << try_filename_full << "\n";
3441 
3442  filename_full = try_filename_full;
3443  }
3444  }
3445  else
3446  {
3447  if ( verbosity_level_ >= 10 )
3448  INFO_PRINT << "Found " << filename_full << "\n";
3449 
3450  if ( Clib::FileExists( try_filename_full.c_str() ) )
3451  INFO_PRINT << "Warning: Found '" << filename_full.c_str() << "' and '"
3452  << try_filename_full.c_str() << "'! Will use first file!\n";
3453  }
3454  }
3455  else
3456  {
3457  filename_full = compilercfg.PolScriptRoot + path;
3458 
3459  if ( verbosity_level_ >= 10 )
3460  {
3461  INFO_PRINT << "Searching for " << filename_full << "\n";
3462  if ( Clib::FileExists( filename_full.c_str() ) )
3463  INFO_PRINT << "Found " << filename_full << "\n";
3464  }
3465  }
3466  }
3467  else
3468  {
3469  INFO_PRINT << "Unable to read include file '" << modulename << "'\n";
3470  return -1;
3471  }
3472  }
3473  else
3474  {
3475  if ( verbosity_level_ >= 10 )
3476  INFO_PRINT << "Searching for " << filename_full << "\n";
3477 
3478  if ( !Clib::FileExists( filename_full.c_str() ) )
3479  {
3480  std::string try_filename_full = compilercfg.IncludeDirectory + filename_part;
3481  if ( verbosity_level_ >= 10 )
3482  INFO_PRINT << "Searching for " << try_filename_full << "\n";
3483  if ( Clib::FileExists( try_filename_full.c_str() ) )
3484  {
3485  if ( verbosity_level_ >= 10 )
3486  INFO_PRINT << "Found " << try_filename_full << "\n";
3487 
3488  // cout << "Using " << try_filename << endl;
3489  filename_full = try_filename_full;
3490  }
3491  }
3492  else
3493  {
3494  if ( verbosity_level_ >= 10 )
3495  INFO_PRINT << "Found " << filename_full << "\n";
3496  }
3497  }
3498 
3499  std::string filename_check = Clib::FullPath( filename_full.c_str() );
3500  if ( included.count( filename_check ) )
3501  return 0;
3502  included.insert( filename_check );
3503 
3504  referencedPathnames.push_back( filename_full );
3505 
3506  char* orig_mt;
3507 
3508  if ( getFileContents( filename_full.c_str(), &orig_mt ) )
3509  {
3510  INFO_PRINT << "Unable to find module " << modulename << "\n"
3511  << "\t(Filename: " << filename_full << ")\n";
3512  return -1;
3513  }
3514 
3515  CompilerContext mod_ctx( filename_full, program->add_dbg_filename( filename_full ), orig_mt );
3516 
3517  std::string save = current_file_path;
3518  current_file_path = getpathof( filename_full );
3519 
3520  int res = compileContext( mod_ctx );
3521 
3522  current_file_path = save;
3523 
3524  free( orig_mt );
3525  if ( res < 0 )
3526  return res;
3527  else
3528  return 0;
3529 }
3530 
3532 {
3533  Token tk_module_name, tk_semicolon;
3534 
3535  if ( getToken( ctx, tk_module_name ) )
3536  {
3537  INFO_PRINT << "Error in INCLUDE statement: INCLUDE should be followed by a module name.\n";
3538  return -1;
3539  }
3540  if ( tk_module_name.id != TOK_IDENT && tk_module_name.id != TOK_STRING )
3541  {
3542  INFO_PRINT << "Error in INCLUDE statement: Expected identifier, got '" << tk_module_name
3543  << "'\n";
3544  return -1;
3545  }
3546 
3547  if ( getToken( ctx, tk_semicolon ) )
3548  {
3549  INFO_PRINT << "Error in INCLUDE statement (module " << tk_module_name << "): "
3550  << "Expected ';', got end-of-file or error\n";
3551  return -1;
3552  }
3553  if ( tk_semicolon.id != TOK_SEMICOLON )
3554  {
3555  INFO_PRINT << "Error in INCLUDE statement (module " << tk_module_name << "): "
3556  << "Expected ';', got '" << tk_semicolon << "'\n";
3557  return -1;
3558  }
3559 
3560  return includeModule( tk_module_name.tokval() );
3561 }
3562 
3563 
3564 int Compiler::insertBreak( const std::string& label )
3565 {
3566  // Now, we've eaten the break; or break label; and 'label' contains the label, if any.
3567  // first, we must find the block level this refers to.
3568  unsigned numVarsToKill = 0;
3569  for ( int i = static_cast<int>( localscope.blockdescs_.size() - 1 ); i >= 0; --i )
3570  {
3571  BlockDesc& bd = localscope.blockdescs_[i];
3572 
3573  numVarsToKill += bd.varcount;
3574 
3575  if ( bd.break_ok && ( label == "" || // we didn't pick, and this is closest
3576  label == bd.label ) ) // this is the one we picked
3577  {
3578  if ( numVarsToKill )
3579  { // local variables were declared in this scope. We need to kill 'em.
3580 
3581  program->append( StoredToken( Mod_Basic, CTRL_LEAVE_BLOCK, TYP_CONTROL, numVarsToKill ),
3582  0 );
3583  }
3584 
3585  unsigned goto_posn;
3586  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &goto_posn );
3587  bd.break_tokens.push_back( goto_posn );
3588  return 0;
3589  }
3590  }
3591  INFO_PRINT << "Couldn't find an appropriate break point";
3592  if ( label != "" )
3593  INFO_PRINT << " for label " << label;
3594  INFO_PRINT << ".\n";
3595 
3596  return -1;
3597 }
3598 
3599 
3600 // break statements come in the following forms:
3601 // break;
3602 // break label;
3603 // we'll emit a LEAVE_BLOCK token if necessary, and
3604 // a GOTO which will be patched in when the block
3605 // is completed.
3607 {
3608  Token tk;
3609  std::string label;
3610 
3611  if ( getToken( ctx, tk ) || ( ( tk.id != TOK_IDENT ) && ( tk.id != TOK_SEMICOLON ) ) )
3612  {
3613  INFO_PRINT << "break statement: expected 'break;' or 'break label;'\n";
3614  return -1;
3615  }
3616 
3617  if ( tk.id == TOK_IDENT )
3618  {
3619  label = tk.tokval();
3620  if ( getToken( ctx, tk ) || tk.id != TOK_SEMICOLON )
3621  {
3622  INFO_PRINT << "break statement: expected 'break;' or 'break label;'\n";
3623  return -1;
3624  }
3625  }
3626  else
3627  {
3628  label = "";
3629  }
3630 
3631  return insertBreak( label );
3632 }
3633 
3634 // continue statements come in the following forms:
3635 // continue;
3636 // continue label;
3637 // we'll emit a LEAVE_BLOCK token if necessary, and
3638 // a GOTO which will point at the continuePC of the block.
3640 {
3641  Token tk;
3642  std::string label;
3643 
3644  if ( getToken( ctx, tk ) || ( ( tk.id != TOK_IDENT ) && ( tk.id != TOK_SEMICOLON ) ) )
3645  {
3646  INFO_PRINT << "continue statement: expected 'continue;' or 'continue label;'\n";
3647  return -1;
3648  }
3649 
3650  if ( tk.id == TOK_IDENT )
3651  {
3652  label = tk.tokval();
3653  if ( getToken( ctx, tk ) || tk.id != TOK_SEMICOLON )
3654  {
3655  INFO_PRINT << "continue statement: expected 'continue;' or 'continue label;'\n";
3656  return -1;
3657  }
3658  }
3659  else
3660  {
3661  label = "";
3662  }
3663 
3664  // Now, we've eaten the continue; or continue label; and 'label' contains the label, if any.
3665  // first, we must find the block level this refers to.
3666  unsigned numVarsToKill = 0;
3667  for ( int i = static_cast<int>( localscope.blockdescs_.size() - 1 ); i >= 0; --i )
3668  {
3669  BlockDesc& bd = localscope.blockdescs_[i];
3670  numVarsToKill += bd.varcount;
3671 
3672  if ( bd.continue_ok && ( label == "" || // we didn't pick, and this is closest
3673  label == bd.label ) ) // this is the one we picked
3674  {
3675  if ( numVarsToKill )
3676  { // local variables were declared in this scope. We need to kill 'em.
3677 
3678  program->append( StoredToken( Mod_Basic, CTRL_LEAVE_BLOCK, TYP_CONTROL, numVarsToKill ),
3679  0 );
3680  }
3681 
3682  unsigned goto_posn;
3683  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &goto_posn );
3684  bd.continue_tokens.push_back( goto_posn );
3685  return 0;
3686  }
3687  }
3688  INFO_PRINT << "Couldn't find an appropriate continue point";
3689  if ( label != "" )
3690  INFO_PRINT << " for label " << label;
3691  INFO_PRINT << ".\n";
3692 
3693  return -1;
3694 }
3695 
3696 
3697 // BASIC-style FOR loop:
3698 // FOR I := 1 to 5 DO
3699 // FOR I
3700 //
3701 // Emitted Code:
3702 // (starting expr value)
3703 // (ending expr value)
3704 // INITFOR
3705 // statement_part:
3706 // (code block)
3707 // NEXTFOR(statement_part)
3708 //
3709 //
3711 {
3712  CompilerContext for_ctx( ctx );
3713  int res;
3714 
3715  Token itrvar, tmptoken;
3716 
3717  res = getToken( ctx, itrvar );
3718  if ( res < 0 )
3719  return res;
3720  if ( itrvar.id != TOK_IDENT )
3721  {
3722  INFO_PRINT << "FOR iterator must be an identifier, got " << itrvar << "\n";
3723  return res;
3724  }
3725 
3726  if ( localscope.varexists( itrvar.tokval() ) )
3727  {
3728  INFO_PRINT << "FOR iterator '" << itrvar << "' hides a local variable.\n";
3729  return -1;
3730  }
3731 
3732  res = eatToken( ctx, TOK_ASSIGN );
3733  if ( res < 0 )
3734  return res;
3735 
3736  res = getExpr( ctx, EXPR_FLAG_TO_TERM_ALLOWED );
3737  if ( res < 0 )
3738  return res;
3739 
3740  res = eatToken( ctx, RSV_TO );
3741  if ( res < 0 )
3742  return res;
3743 
3744  res = getExpr2( ctx, EXPR_FLAG_AUTO_TERM_ALLOWED );
3745  if ( res < 0 )
3746  return res;
3747 
3749  unsigned initfor_posn;
3750  program->append( StoredToken( Mod_Basic, INS_INITFOR, TYP_RESERVED, 0 ), &initfor_posn );
3751 
3752  /*
3753  INITFOR creates two local variables, placeholders for the iterator
3754  and end value. Only the iterator can be accessed, for now.
3755  */
3756  program->addlocalvar( itrvar.tokval() );
3757  if ( verbosity_level_ >= 5 )
3758  localscope.addvar( itrvar.tokval(), for_ctx );
3759  else
3760  localscope.addvar( itrvar.tokval(), for_ctx, false );
3761  program->addlocalvar( "_" + std::string( itrvar.tokval() ) + "_end" );
3762  localscope.addvar( "_" + std::string( itrvar.tokval() ) + "_end", for_ctx, false );
3763 
3764  StoredTokenContainer* prog_tokens = &program->tokens;
3765  unsigned again_posn = prog_tokens->next();
3767  Token endblock_tkn;
3768  res = readblock( ctx, 1, RSV_ENDFOR, NULL, &endblock_tkn );
3769  if ( res < 0 )
3770  return res;
3771 
3772  emit_leaveblock();
3773 
3774  program->update_dbg_pos( endblock_tkn );
3775  unsigned nextfor_posn;
3776  program->append( StoredToken( Mod_Basic, INS_NEXTFOR, TYP_RESERVED, again_posn ), &nextfor_posn );
3777 
3778  patchblock_continues( nextfor_posn );
3779  patchblock_breaks( prog_tokens->next() );
3780  localscope.popblock();
3781  program->leaveblock();
3782 
3783  leaveblock( 0, 0 );
3784 
3785  patchoffset( initfor_posn, prog_tokens->next() );
3786 
3787  return 0;
3788 }
3789 
3791 {
3792  {
3793  Token tkn;
3794  getToken( ctx, tkn );
3795  if ( tkn.id != TOK_LPAREN )
3796  {
3797  INFO_PRINT << "FOR: expected '('\n";
3798  return -1;
3799  }
3800  }
3801 
3803 
3804  Expression initial_expr;
3805  Expression predicate_expr;
3806  Expression iterate_expr;
3807  if ( readexpr( initial_expr, ctx, EXPR_FLAG_SEMICOLON_TERM_ALLOWED ) != 1 )
3808  return -1;
3809  if ( readexpr( predicate_expr, ctx, EXPR_FLAG_SEMICOLON_TERM_ALLOWED ) != 1 )
3810  return -1;
3811  if ( readexpr( iterate_expr, ctx, EXPR_FLAG_RIGHTPAREN_TERM_ALLOWED ) != 1 )
3812  return -1;
3813 
3814  {
3815  Token tkn;
3816  if ( getToken( ctx, tkn ) || tkn.id != TOK_RPAREN )
3817  {
3818  INFO_PRINT << "FOR: expected '('\n";
3819  return -1;
3820  }
3821  }
3822 
3823  /*
3824  The c-style 'for' statement gets generated as follows:
3825  given:
3826  for( initial; predicate; iterate ) statement;
3827  initial;
3828  again:
3829  predicate;
3830  if true goto statement_part;
3831  break;
3832  statement_part:
3833  statement;
3834  iterate_part:
3835  iterate;
3836  goto again;
3837  */
3838 
3839 
3840  inject( initial_expr );
3842  StoredTokenContainer* prog_tokens = &program->tokens;
3843  unsigned againPC = prog_tokens->next();
3844  inject( predicate_expr );
3845 
3846  unsigned if_posn;
3847  program->append( StoredToken( Mod_Basic, RSV_JMPIFTRUE, TYP_RESERVED, 0 ), &if_posn );
3848 
3849  insertBreak( "" );
3850  patchoffset( if_posn, prog_tokens->next() );
3851 
3852  int res = getStatement( ctx, 1 );
3853  if ( res < 0 )
3854  return -1;
3855  unsigned nextPC = prog_tokens->next();
3856 
3857  inject( iterate_expr );
3859 
3860  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, againPC ), 0 );
3861 
3862  leaveblock( prog_tokens->next(), nextPC );
3863 
3864  return 0;
3865 }
3867 {
3868  {
3869  Token tkn;
3870  getToken( ctx, tkn );
3871  if ( tkn.id != TOK_LPAREN )
3872  {
3873  INFO_PRINT << "FOR: expected '('\n";
3874  return -1;
3875  }
3876  }
3877 
3879 
3880  Expression initial_expr;
3881  Expression predicate_expr;
3882  Expression iterate_expr;
3883  if ( readexpr( initial_expr, ctx, EXPR_FLAG_SEMICOLON_TERM_ALLOWED ) != 1 )
3884  return -1;
3885  if ( readexpr( predicate_expr, ctx, EXPR_FLAG_SEMICOLON_TERM_ALLOWED ) != 1 )
3886  return -1;
3887  if ( readexpr( iterate_expr, ctx, EXPR_FLAG_RIGHTPAREN_TERM_ALLOWED ) != 1 )
3888  return -1;
3889 
3890  {
3891  Token tkn;
3892  if ( getToken( ctx, tkn ) || tkn.id != TOK_RPAREN )
3893  {
3894  INFO_PRINT << "FOR: expected '('\n";
3895  return -1;
3896  }
3897  }
3898 
3899  /*
3900  The c-style 'for' statement gets generated as follows:
3901  given:
3902  for( initial; predicate; iterate ) statement;
3903  initial;
3904  again:
3905  predicate;
3906  if true goto statement_part;
3907  break;
3908  statement_part:
3909  statement;
3910  iterate_part:
3911  iterate;
3912  goto again;
3913  */
3914 
3915 
3916  inject( initial_expr );
3918  StoredTokenContainer* prog_tokens = &program->tokens;
3919  unsigned againPC = prog_tokens->next();
3920  inject( predicate_expr );
3921 
3922  unsigned if_posn;
3923  program->append( StoredToken( Mod_Basic, RSV_JMPIFFALSE, TYP_RESERVED, 0 ), &if_posn );
3924 
3926  Token endblock_tkn;
3927  int res = readblock( ctx, 1, RSV_ENDFOR, NULL, &endblock_tkn );
3928  if ( res < 0 )
3929  return res;
3930  leaveblock( 0, 0 );
3931 
3932  unsigned continuePC = prog_tokens->next();
3933 
3934  program->update_dbg_pos( endblock_tkn );
3935  inject( iterate_expr );
3937 
3938  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, againPC ), 0 );
3939  patchoffset( if_posn, prog_tokens->next() );
3940 
3941  leaveblock( prog_tokens->next(), continuePC );
3942 
3943  return 0;
3944 }
3945 
3947 {
3948  Token tkn;
3949  int res;
3950  res = peekToken( ctx, tkn );
3951  if ( res )
3952  {
3953  INFO_PRINT << "Error in FOR statement\n";
3954  return -1;
3955  }
3956 
3957  if ( tkn.id == TOK_LPAREN )
3958  return handleBracketedFor_c( ctx );
3959  else
3960  return handleBracketedFor_basic( ctx );
3961 }
3963 {
3964  int cnt = program->tokens.count();
3965  program->fileline.resize( cnt + 1 );
3966  program->fileline[cnt] =
3967  ctx.filename + ", Line " + Clib::decint( static_cast<unsigned int>( ctx.line ) );
3968 }
3970 {
3971  std::string last;
3972  if ( program->fileline.size() )
3973  last = program->fileline.back();
3974 
3975  if ( program->fileline.size() == program->tokens.count() + 1 || last.empty() ||
3976  last.substr( 0, ctx.filename.size() ) != ctx.filename )
3977  {
3978  emitFileLine( ctx );
3979  }
3980 }
3982 {
3983  int cnt = program->tokens.count();
3984  program->sourcelines.resize( cnt + 1 );
3985  program->sourcelines[cnt] = curLine;
3986 }
3988 {
3989  int res;
3990  unsigned last_position = 0;
3991 
3992  readCurLine( ctx );
3993 
3994  if ( include_debug )
3995  {
3996  program->symbols.append( curLine, last_position );
3997 
3998  DebugToken DT;
4000  DT.offset = static_cast<unsigned int>( ctx.s - ctx.s_begin );
4001  DT.strOffset = last_position;
4002 
4003  program->symbols.append( &DT, sizeof DT, last_position );
4004  }
4005  StoredToken stok( Mod_Basic, CTRL_STATEMENTBEGIN, TYP_CONTROL, last_position );
4006 
4007 
4008  // reinit();
4009 
4010  Token token;
4011 
4012  if ( peekToken( ctx, token ) == 1 )
4013  {
4014  getToken( ctx, token ); // trailing whitespace can hurt.
4015  return 1; // all done!
4016  }
4017 
4018  if ( token.id != RSV_FUNCTION && token.id != RSV_PROGRAM )
4019  {
4020  savesourceline();
4022  }
4023 
4024  if ( include_debug )
4025  program->append( stok, &last_position );
4026  else
4027  last_position = program->tokens.count();
4028 
4030  {
4031  INFO_PRINT << "Warning: Found deprecated token "
4032  << "'" << token.tokval() << "'"
4033  << " on line " << ctx.line << " of " << ctx.filename << "\n";
4035  throw std::runtime_error( "Warnings treated as errors." );
4036  // warning only; doesn't bail out.
4037  }
4038 
4039  if ( token.type == TYP_RESERVED )
4040  {
4041  getToken( ctx, token );
4042  switch ( token.id )
4043  {
4044  case RSV_OPTION_BRACKETED:
4045  INFO_PRINT << "_OptionBracketed is obsolete.\n";
4046  // bracketed_if_ = true;
4047  return 0;
4048  case RSV_DECLARE:
4049  return handleDeclare( ctx );
4050 
4051  case RSV_EXPORTED:
4052  case RSV_FUNCTION:
4053  return handleBracketedFunction2( ctx, level, token.type );
4054 
4055  case RSV_USE_MODULE:
4056  return handleUse( ctx );
4057  case RSV_INCLUDE_FILE:
4058  return handleInclude( ctx );
4059 
4060  case RSV_PROGRAM:
4061  return handleProgram( ctx, level + 1 );
4062  case RSV_RETURN:
4063  return handleReturn( ctx );
4064  case RSV_EXIT:
4065  return handleExit( ctx );
4066 
4067  case RSV_ST_IF:
4068  return handleBracketedIf( ctx, level + 1 );
4069 
4070  case RSV_FOR:
4071  return handleFor( ctx );
4072  case RSV_FOREACH:
4073  return handleForEach( ctx, level + 1 );
4074  case RSV_SWITCH:
4075  return handleSwitch( ctx, level + 1 );
4076  case RSV_REPEAT:
4077  return handleRepeatUntil( ctx, level + 1 );
4078  case RSV_DO:
4079  return handleDoClause( ctx, level + 1 );
4080  case RSV_WHILE:
4081  return handleBracketedWhile( ctx, level + 1 );
4082 
4083  case RSV_BREAK:
4084  return handleBreak( ctx );
4085  case RSV_CONTINUE:
4086  return handleContinue( ctx );
4087 
4088  case RSV_VAR:
4089  return handleVarDeclare( ctx, inGlobalScope() ? RSV_GLOBAL : RSV_LOCAL );
4090  case RSV_CONST:
4091  return handleConstDeclare( ctx );
4092  case RSV_ENUM:
4093  return handleEnumDeclare( ctx );
4094 
4095  // DEPRECATED:
4096  // case RSV_BEGIN: return handleBlock(ctx, level+1);
4097 
4098  default:
4099  INFO_PRINT << "Unhandled reserved word: " << token << "\n";
4100  return -1;
4101  // assert(0);
4102  break;
4103  }
4104  }
4105  else if ( token.type == TYP_LABEL )
4106  {
4107  if ( !quiet )
4108  INFO_PRINT << "Label found! " << token << "\n";
4109  getToken( ctx, token );
4110 
4111  Token precedes;
4112  res = peekToken( ctx, precedes );
4113  if ( res != 0 ||
4114  ( precedes.id != RSV_SWITCH && precedes.id != RSV_FOREACH && precedes.id != RSV_REPEAT &&
4115  precedes.id != RSV_WHILE && precedes.id != RSV_DO && precedes.id != RSV_FOR ) )
4116  {
4117  INFO_PRINT
4118  << "Illegal location for label: " << token.tokval() << "\n"
4119  << "Labels can only come before DO, WHILE, FOR, FOREACH, REPEAT, and CASE statements.\n";
4120  return -1;
4121  }
4122  latest_label = token.tokval();
4123  token.lval = last_position;
4124  return 0;
4125  }
4126 
4127  size_t exprlen;
4129 
4130  if ( res < 0 )
4131  return res;
4132  if ( exprlen != 0 )
4133  {
4134  StoredToken tmptoken;
4135  StoredTokenContainer* prog_tokens = &program->tokens;
4136  prog_tokens->atGet1( prog_tokens->count() - 1, tmptoken );
4137  if ( tmptoken.id == TOK_CONSUMER )
4138  {
4139  prog_tokens->atGet1( prog_tokens->count() - 2, tmptoken );
4140  }
4141 
4142  if ( tmptoken.id == TOK_ASSIGN || tmptoken.id == TOK_PLUSEQUAL ||
4143  tmptoken.id == TOK_MINUSEQUAL || tmptoken.id == TOK_TIMESEQUAL ||
4144  tmptoken.id == TOK_DIVIDEEQUAL || tmptoken.id == TOK_MODULUSEQUAL ||
4145  tmptoken.id == INS_SUBSCRIPT_ASSIGN || tmptoken.id == INS_SUBSCRIPT_ASSIGN_CONSUME ||
4146  tmptoken.id == INS_MULTISUBSCRIPT_ASSIGN || tmptoken.id == INS_ASSIGN_CONSUME ||
4147  tmptoken.id == TOK_ADDMEMBER || tmptoken.id == TOK_DELMEMBER || tmptoken.id == TOK_FUNC ||
4148  tmptoken.id == INS_CALL_METHOD || tmptoken.id == TOK_USERFUNC ||
4149  tmptoken.id == CTRL_JSR_USERFUNC || tmptoken.id == INS_ASSIGN_LOCALVAR ||
4150  tmptoken.id == INS_ASSIGN_GLOBALVAR || tmptoken.id == INS_SET_MEMBER ||
4151  tmptoken.id == INS_SET_MEMBER_CONSUME || tmptoken.id == INS_SET_MEMBER_ID ||
4152  tmptoken.id == INS_SET_MEMBER_ID_CONSUME || tmptoken.id == INS_CALL_METHOD_ID ||
4158  {
4159  // ok! These operators actually accomplish something.
4160  }
4162  {
4163  if ( tmptoken.id == TOK_EQUAL1 )
4164  {
4165  INFO_PRINT << "Warning: Equals test result ignored. Did you mean := for assign?\n"
4166  << "near: " << curLine << "\n";
4168  throw std::runtime_error( "Warnings treated as errors." );
4169  else
4170  INFO_PRINT << ctx;
4171  }
4172  else
4173  {
4174  // warn code has no effect/value lost
4175  INFO_PRINT << "Warning: Result of operation may have no effect.\n"
4176  << "Token ID: " << tmptoken.id << "\n"
4177  << "near: " << curLine << "\n";
4179  throw std::runtime_error( "Warnings treated as errors." );
4180  else
4181  INFO_PRINT << ctx;
4182  }
4183  }
4184  // cout << "Statement: " << Parser.CA << endl;
4185  }
4186  return res;
4187 }
4188 
4190 {
4191  ctx.skipws();
4192  ctx.skipcomments();
4193 
4194  CompilerContext savectx( ctx );
4195  program->setcontext( savectx );
4196  program->setstatementbegin();
4197  int res = 0;
4198  try
4199  {
4200  res = _getStatement( ctx, level );
4201  if ( res < 0 )
4202  {
4203  fmt::Writer _tmp;
4204  _tmp << "Error compiling statement at ";
4205  savectx.printOnShort( _tmp );
4206  INFO_PRINT << _tmp.str();
4207  }
4208  }
4209  catch ( std::exception& ex )
4210  {
4211  fmt::Writer _tmp;
4212  _tmp << "Error compiling statement at ";
4213  savectx.printOnShort( _tmp );
4214  _tmp << ex.what() << "\n";
4215  INFO_PRINT << _tmp.str();
4216  res = -1;
4217  }
4218  return res;
4219 }
4220 
4222 {
4223  int res;
4224  Token token;
4225  Token funcName;
4226 
4227  if ( inFunction )
4228  {
4229  INFO_PRINT << "Can't declare a function inside another function.\n"
4230  << "(attempt to declare " << funcName << ")\n";
4231  return -1;
4232  }
4233  inFunction = 1;
4234 
4235  UserFunction userfunc;
4236 
4237  if ( readFunctionDeclaration( ctx, userfunc ) )
4238  {
4239  return -1;
4240  }
4241  userFunctions[userfunc.name] = userfunc;
4242  /*
4243  should be begin, then statements while peektoken != end, then eat end.
4244 
4245  getToken(s, token);
4246  if (token.type != TYP_DELIMITER || token.id != DELIM_SEMICOLON) {
4247  err = PERR_MISSINGDELIM;
4248  return -1;
4249  }
4250  */
4251  // woo-hoo! recursive calls should work.
4252  // cout << "func decl: " << curLine << endl;
4253  unsigned posn = 0;
4254 
4255  unsigned skip_goto_posn;
4256 
4257  StoredTokenContainer* prog_tokens = &program->tokens;
4258  if ( include_debug )
4259  {
4260  /*
4261  A bit of explanation:
4262  We want to end up with this:
4263  GOTO (skip)
4264  STATEMENT BEGIN (function declaration) <-- user func address
4265  User Function (Foo)
4266  But right now we have:
4267  STATEMENT_BEGIN (line info)
4268  and adding the goto and function would end up adding:
4269  GOTO (skip)
4270  (User Function (Foo) will soon go here) <-- user func address
4271  So, we append a copy of the statement_begin, then put a goto
4272  in its original position.
4273  */
4274 
4275  // STATEMENT
4276  StoredToken tmptoken;
4277  prog_tokens->atGet1( prog_tokens->count() - 1, tmptoken );
4278  program->append( tmptoken, &userfunc.position ); // STATEMENT, STATEMENT
4279 
4280  // skip_goto_posn: a goto is inserted, so prog ctrl will skip over this function
4281  skip_goto_posn = prog_tokens->count() - 2;
4282  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), skip_goto_posn );
4283  /* GOTO, STATEMENT */
4284 
4285  program->symbols.append( userfunc.name.c_str(), posn );
4286  program->append( StoredToken( Mod_Basic, TOK_USERFUNC, TYP_USERFUNC, posn ) );
4287  }
4288  else
4289  {
4290  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &skip_goto_posn );
4291  userfunc.position = posn = prog_tokens->count();
4292  }
4293 
4294  res = getToken( ctx, token );
4295  if ( res )
4296  return res;
4297  if ( token.id != RSV_BEGIN )
4298  {
4299  INFO_PRINT << "Error reading function definition for " << userfunc.name << "()\n"
4300  << "Expected BEGIN .. END block, got token: '" << token << "'\n";
4301 
4302  // cout << "Functions must contain a BEGIN .. END block." << endl;
4303  return -1;
4304  }
4305 
4306  program->enterfunction();
4308 
4309  for ( int i = static_cast<unsigned int>( userfunc.parameters.size() - 1 ); i >= 0; --i )
4310  {
4311  UserParam* user_param = &userfunc.parameters[i];
4312  program->symbols.append( user_param->name.c_str(), posn );
4313  program->append(
4315  TYP_OPERATOR, posn ),
4316  0 );
4317  program->addlocalvar( user_param->name );
4318 
4319  localscope.addvar( user_param->name, ctx, true, user_param->unused );
4320  }
4321 
4322  res = handleBlock( ctx, 1 /* level */ );
4323  if ( res < 0 )
4324  return res;
4325 
4326  StoredToken tmp;
4327  prog_tokens->atGet1( prog_tokens->count() - 1, tmp );
4328  if ( tmp.id != RSV_RETURN )
4329  {
4330  program->symbols.append( "", posn );
4331  program->append( StoredToken( Mod_Basic, TOK_STRING, TYP_OPERAND, posn ) );
4333  }
4334 
4335  leaveblock( 0, 0 );
4336  program->leavefunction();
4337 
4338  /* now, the skip goto must be patched up with the correct PC address*/
4339  patchoffset( skip_goto_posn, prog_tokens->next() );
4340 
4341 
4342  inFunction = 0;
4343  return 0;
4344 }
4345 
4347 {
4348  CompilerContext save_ctx( ctx );
4349  int res;
4350  Token token;
4351 
4352  if ( inFunction )
4353  {
4354  INFO_PRINT << "Can't declare a function inside another function.\n";
4355  return -1;
4356  }
4357  inFunction = 1;
4358 
4359  UserFunction userfunc;
4360 
4361  if ( readFunctionDeclaration( ctx, userfunc ) )
4362  {
4363  return -1;
4364  }
4365  userFunctions[userfunc.name] = userfunc;
4366  /*
4367  should be begin, then statements while peektoken != end, then eat end.
4368 
4369  getToken(s, token);
4370  if (token.type != TYP_DELIMITER || token.id != DELIM_SEMICOLON) {
4371  err = PERR_MISSINGDELIM;
4372  return -1;
4373  }
4374  */
4375  /* woo-hoo! recursive calls should work. */
4376  // cout << "func decl: " << curLine << endl;
4377  unsigned posn = 0;
4378 
4379  unsigned skip_goto_posn;
4380 
4381  StoredTokenContainer* prog_tokens = &program->tokens;
4382  if ( include_debug )
4383  {
4384  /*
4385  A bit of explanation:
4386  We want to end up with this:
4387  GOTO (skip)
4388  STATEMENT BEGIN (function declaration) <-- user func address
4389  User Function (Foo)
4390  But right now we have:
4391  STATEMENT_BEGIN (line info)
4392  and adding the goto and function would end up adding:
4393  GOTO (skip)
4394  (User Function (Foo) will soon go here) <-- user func address
4395  So, we append a copy of the statement_begin, then put a goto
4396  in its original position.
4397  */
4398  /* STATEMENT */
4399  StoredToken tmptoken;
4400  prog_tokens->atGet1( prog_tokens->count() - 1, tmptoken );
4401  program->append( tmptoken, &userfunc.position ); /* STATEMENT, STATEMENT */
4402 
4403  /* skip_goto_posn: a goto is inserted, so prog ctrl will skip over this function */
4404  skip_goto_posn = prog_tokens->count() - 2;
4405  prog_tokens->atPut1( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), skip_goto_posn );
4406  /* GOTO, STATEMENT */
4407 
4408  program->symbols.append( userfunc.name.c_str(), posn );
4409  program->append( StoredToken( Mod_Basic, TOK_USERFUNC, TYP_USERFUNC, posn ) );
4410  }
4411  else
4412  {
4413  program->append( StoredToken( Mod_Basic, RSV_GOTO, TYP_RESERVED, 0 ), &skip_goto_posn );
4414  userfunc.position = posn = prog_tokens->count();
4415  }
4416 
4417  program->enterfunction();
4419 
4420  for ( int i = static_cast<unsigned int>( userfunc.parameters.size() - 1 ); i >= 0; --i )
4421  {
4422  UserParam* params = &userfunc.parameters[i];
4423  program->symbols.append( params->name.c_str(), posn );
4424  program->append(
4426  TYP_OPERATOR, posn ),
4427  0 );
4428 
4429  program->addlocalvar( params->name );
4430  localscope.addvar( params->name, ctx, true, params->unused );
4431  }
4432 
4433  Token endblock_tkn;
4434  res = readblock( ctx, 1, RSV_ENDFUNCTION, NULL, &endblock_tkn );
4435  if ( res < 0 )
4436  {
4437  INFO_PRINT << "Error occurred reading function body for '" << userfunc.name << "'\n"
4438  << "Function location: " << save_ctx << "Error location: \n";
4439  return res;
4440  }
4441 
4442  program->update_dbg_pos( endblock_tkn );
4443  StoredToken tmp;
4444  prog_tokens->atGet1( prog_tokens->count() - 1, tmp );
4445  if ( tmp.id != RSV_RETURN )
4446  {
4447  program->symbols.append( "", posn );
4448  program->append( StoredToken( Mod_Basic, TOK_STRING, TYP_OPERAND, posn ) );
4450  }
4451 
4452  leaveblock( 0, 0 );
4453  program->leavefunction();
4454 
4455  /* now, the skip goto must be patched up with the correct PC address*/
4456  patchoffset( skip_goto_posn, prog_tokens->next() );
4457 
4458 
4459  inFunction = 0;
4460  return 0;
4461 }
4462 
4463 // pass 2 function: just skip past, to the ENDFUNCTION.
4464 int Compiler::handleBracketedFunction2( CompilerContext& ctx, int /*level*/, int tokentype )
4465 {
4466  CompilerContext save_ctx( ctx );
4467  int res = -1;
4468  Token tk_funcname;
4469 
4470  if ( tokentype == RSV_EXPORTED )
4471  {
4472  Token tk_function;
4473  res = getToken( ctx, tk_function );
4474  if ( res )
4475  return res;
4476  if ( tk_function.id != RSV_FUNCTION )
4477  {
4478  INFO_PRINT << "Expected 'function' after 'exported'.\n";
4479  return -1;
4480  }
4481  }
4482 
4483  if ( inFunction )
4484  {
4485  INFO_PRINT << "Can't declare a function inside another function.\n";
4486  return -1;
4487  }
4488  getToken( ctx, tk_funcname );
4489  while ( ctx.s[0] )
4490  {
4491  Token token;
4492  res = getToken( ctx, token );
4493  if ( res < 0 )
4494  break;
4495  if ( token.id == RSV_ENDFUNCTION )
4496  {
4497  // we already grabbed the function body.
4498  return 0;
4499  }
4500  }
4501  if ( !ctx.s[0] )
4502  {
4503  INFO_PRINT << "End-of-File detected, expected 'ENDFUNCTION'\n";
4504  return -1;
4505  }
4506 
4507  if ( res < 0 )
4508  {
4509  INFO_PRINT << "Error occurred reading function body for '" << tk_funcname.tokval() << "'\n"
4510  << "Function location: " << save_ctx << "Error location: \n";
4511  return res;
4512  }
4513 
4514  return res;
4515 }
4516 
4517 int Compiler::handleProgram( CompilerContext& ctx, int /*level*/ )
4518 {
4519  Token tk_progname;
4520  int res;
4521 
4522  if ( haveProgram )
4523  {
4524  INFO_PRINT << "'program' function has already been defined.\n";
4525  return -1;
4526  }
4527  haveProgram = true;
4528  program->program_decl = curLine;
4529  program_ctx = ctx;
4530  const char* program_body_start = ctx.s;
4531  while ( ctx.s[0] )
4532  {
4533  Token token;
4534  res = getToken( ctx, token );
4535  if ( res < 0 )
4536  return res;
4537 
4538  if ( token.id == RSV_ENDPROGRAM )
4539  {
4540  const char* program_body_end = ctx.s;
4541  size_t len = program_body_end - program_body_start + 1;
4542  program_source = new char[len];
4543  delete_these_arrays.push_back( program_source );
4544  memcpy( program_source, program_body_start, len - 1 );
4545  program_source[len - 1] = '\0';
4547  return 0;
4548  }
4549  }
4550  INFO_PRINT << "End of file detected, expected 'endprogram'\n";
4551  return -1;
4552 }
4553 
4555 {
4556  Token tk_progname;
4557  int res;
4558 
4559  emitFileLine( ctx );
4560  program->program_PC = program->tokens.count();
4561 
4562  res = getToken( ctx, tk_progname );
4563  if ( res < 0 )
4564  return res;
4565  if ( tk_progname.id != TOK_IDENT )
4566  {
4567  INFO_PRINT << "Error: expected identified after 'program', got '" << tk_progname << "'\n";
4568  return -1;
4569  }
4570 
4571  program->enterfunction();
4573 
4574  res = eatToken( ctx, TOK_LPAREN );
4575  if ( res < 0 )
4576  return res;
4577  for ( ;; )
4578  {
4579  Token token;
4580  res = getToken( ctx, token );
4581  bool unused = false;
4582  if ( res < 0 )
4583  return res;
4584  if ( res > 0 )
4585  {
4586  INFO_PRINT << "End-of-file reached reading program argument list\n";
4587  return -1;
4588  }
4589  if ( token.id == TOK_UNUSED )
4590  {
4591  unused = true;
4592  res = getToken( ctx, token );
4593  if ( res )
4594  return -1;
4595  }
4596  if ( token.id == TOK_RPAREN )
4597  {
4598  break;
4599  }
4600  else if ( token.id == TOK_IDENT )
4601  {
4602  unsigned varpos;
4603  if ( localscope.varexists( token.tokval(), varpos ) )
4604  {
4605  INFO_PRINT << "Program argument '" << token << "' multiply defined.\n";
4606  return -1;
4607  }
4608  unsigned posn;
4609  program->symbols.append( token.tokval(), posn );
4610  program->append( StoredToken( Mod_Basic, INS_GET_ARG, TYP_OPERATOR, posn ), 0 );
4611  program->addlocalvar( token.tokval() );
4612  localscope.addvar( token.tokval(), ctx, true, unused );
4613 
4614  res = peekToken( ctx, token );
4615  if ( res < 0 )
4616  return res;
4617  if ( token.id == TOK_COMMA )
4618  getToken( ctx, token );
4619  ++nProgramArgs;
4620  }
4621  else
4622  {
4623  INFO_PRINT << "Expected arguments or right-paren in program arglist, got '" << token << "'\n";
4624  return -1;
4625  }
4626  }
4627  program->haveProgram = true;
4628  program->expectedArgs = nProgramArgs;
4629  Token endblock_tkn;
4630  res = readblock( ctx, level, RSV_ENDPROGRAM, NULL, &endblock_tkn );
4631  if ( res < 0 )
4632  return res;
4633 
4634  program->update_dbg_pos( endblock_tkn );
4635  leaveblock( 0, 0 );
4636  program->leavefunction();
4637 
4638  return 0;
4639 }
4640 
4642 {
4643  StoredTokenContainer* program_tokens = &program->tokens;
4644  unsigned first_PC = program_tokens->count();
4645 
4646  emitFileLine( ctx );
4647  program->function_decls.resize( program_tokens->count() + 1 );
4648  program->function_decls[program_tokens->count()] = userfunc.declaration;
4649  CompilerContext save_ctx( ctx );
4650  int res;
4651  Token token;
4652 
4653  inFunction = 1;
4654  /*
4655  should be begin, then statements while peektoken != end, then eat end.
4656 
4657  getToken(s, token);
4658  if (token.type != TYP_DELIMITER || token.id != DELIM_SEMICOLON) {
4659  err = PERR_MISSINGDELIM;
4660  return -1;
4661  }
4662  */
4663  /* woo-hoo! recursive calls should work. */
4664  // cout << "func decl: " << curLine << endl;
4665  unsigned posn = 0;
4666 
4667  if ( userfunc.exported )
4668  {
4669  EPExportedFunction ef;
4670  ef.name = userfunc.name;
4671  ef.nargs = static_cast<unsigned int>( userfunc.parameters.size() );
4672  ef.PC = program_tokens->count();
4673  program->exported_functions.push_back( ef );
4674 
4675  // insert the stub:
4676  program->append( StoredToken( Mod_Basic, CTRL_MAKELOCAL, TYP_CONTROL, 0 ), ctx );
4677  program->append( StoredToken( Mod_Basic, CTRL_JSR_USERFUNC, TYP_CONTROL, ef.PC + 3 ), ctx );
4678  program->append( StoredToken( Mod_Basic, CTRL_PROGEND, TYP_CONTROL, 0 ), ctx );
4679  }
4680 
4681  userfunc.position = posn = program_tokens->count();
4682 
4683 
4684  program->enterfunction();
4686 
4687  for ( int i = static_cast<unsigned int>( userfunc.parameters.size() - 1 ); i >= 0; --i )
4688  {
4689  UserParam* params = &userfunc.parameters[i];
4690  program->symbols.append( params->name.c_str(), posn );
4691  program->append(
4693  TYP_OPERATOR, posn ),
4694  save_ctx, 0 );
4695  program->addlocalvar( params->name );
4696  localscope.addvar( params->name, ctx, true, params->unused );
4697  }
4698 
4699  BTokenId last_statement_id;
4700  Token endblock_tkn;
4701  res = readblock( ctx, 1, RSV_ENDFUNCTION, &last_statement_id, &endblock_tkn );
4702  if ( res < 0 )
4703  {
4704  INFO_PRINT << "Error in function '" << userfunc.name << "', " << ctx << "\n";
4705  return res;
4706  }
4707  program->update_dbg_pos( endblock_tkn );
4708 
4709  StoredToken tmp;
4710  program_tokens->atGet1( program_tokens->count() - 1, tmp );
4711 
4712  /*
4713  This used to check to see if the last instruction was a RETURN;
4714  however, if that's in an ELSE block, that return won't get executed.
4715  This means if the function really does end in a return, this will
4716  generate extra code. The optimizer will have to take care of this,
4717  if it can.
4718  Also note, the "leaveblock" at the end will also generate an instruction
4719  that will never get executed (NEVER!)
4720  */
4721  if ( last_statement_id != RSV_RETURN )
4722  {
4723  program->symbols.append( int( 0 ), posn );
4724  program->append( StoredToken( Mod_Basic, TOK_LONG, TYP_OPERAND, posn ) );
4726  }
4727 
4728  // was leaveblock(0,0)
4729  localscope.popblock();
4730  program->leaveblock();
4731  program->leavefunction();
4732 
4733  unsigned last_PC = program_tokens->count() - 1;
4734  program->addfunction( userfunc.name, first_PC, last_PC );
4735 
4736  inFunction = 0;
4737  return 0;
4738 }
4739 
4741 {
4742  CompilerContext save_ctx( ctx );
4743  int res;
4744  Token token;
4745 
4746  UserFunction userfunc;
4747  userfunc.declaration = curLine;
4748 
4749  res = getToken( ctx, token );
4750  if ( res )
4751  return res;
4752  if ( token.id == RSV_EXPORTED )
4753  {
4754  userfunc.exported = true;
4755  res = getToken( ctx, token );
4756  if ( res )
4757  return res;
4758  if ( token.id != RSV_FUNCTION )
4759  {
4760  INFO_PRINT << "Expected 'function' after 'exported'\n";
4761  return -1;
4762  }
4763  }
4764 
4765  if ( readFunctionDeclaration( ctx, userfunc ) )
4766  {
4767  INFO_PRINT << save_ctx;
4768  return -1;
4769  }
4770  userfunc.ctx = ctx;
4771 
4772  const char* function_body_start = ctx.s;
4773  while ( ctx.s[0] )
4774  {
4775  Token _token;
4776  res = getToken( ctx, _token );
4777  if ( res < 0 )
4778  break;
4779  if ( _token.id == RSV_ENDFUNCTION )
4780  {
4781  const char* function_body_end = ctx.s;
4782  size_t len = function_body_end - function_body_start + 1;
4783  userfunc.function_body = new char[len];
4784  delete_these_arrays.push_back( userfunc.function_body );
4785  memcpy( userfunc.function_body, function_body_start, len - 1 );
4786  userfunc.function_body[len - 1] = '\0';
4787  userfunc.ctx.s = userfunc.ctx.s_begin = userfunc.function_body;
4788  userFunctions[userfunc.name] = userfunc;
4789  res = 0;
4790  return 0;
4791  }
4792  }
4793  if ( !ctx.s[0] )
4794  {
4795  INFO_PRINT << "End-of-File detected, expected 'ENDFUNCTION'\n" << save_ctx;
4796  return -1;
4797  }
4798 
4799  if ( res < 0 )
4800  {
4801  INFO_PRINT << "Error occurred reading function body for '" << userfunc.name << "'\n"
4802  << "Function location: " << save_ctx << "Error location: \n";
4803  return res;
4804  }
4805  return res;
4806 }
4807 
4808 
4809 void Compiler::patchoffset( unsigned instruc, unsigned newoffset )
4810 {
4811  StoredToken tkn;
4812 
4813  program->tokens.atGet1( instruc, tkn );
4814  tkn.offset = static_cast<unsigned short>( newoffset );
4815  program->tokens.atPut1( tkn, instruc );
4816 }
4817 
4818 
4820 {
4821  int res = 0;
4822  // scopes.push( LocalScope() );
4823  // currentscope = &scopes.top();
4824  // currentscope = &scope_;
4825 
4826 
4827  try
4828  {
4829  while ( ( res >= 0 ) && ctx.s[0] )
4830  {
4831  res = getStatement( ctx, 0 );
4832  }
4833  }
4834  catch ( std::exception& )
4835  {
4836  INFO_PRINT << "Exception detected during compilation.\n" << ctx;
4837  throw;
4838  }
4839 
4840  // currentscope = NULL;
4841  // scopes.pop();
4842  // assert( scopes.empty() );
4843 
4844  if ( res == -1 )
4845  {
4846  if ( err || ext_err[0] )
4847  {
4848  INFO_PRINT << "Parse Error: " << ParseErrorStr[err];
4849  if ( ext_err[0] )
4850  INFO_PRINT << " " << ext_err;
4851  INFO_PRINT << "\n";
4852  err = PERR_NONE;
4853  ext_err[0] = '\0';
4854  }
4855  else
4856  {
4857  INFO_PRINT << "Compilation Error:\n";
4858  }
4859  if ( curLine[0] )
4860  {
4861  INFO_PRINT << "Near: " << curLine << "\n";
4862  }
4863  INFO_PRINT << ctx;
4864  return -1;
4865  }
4866  return 0;
4867 }
4868 
4870 {
4871  unsigned dummy = 0;
4872  curSourceFile = 0;
4873  // startPos = ctx.s;
4874  program->symbols.append( "", dummy );
4875 
4876  // see comment by handleUse: may want to only allow use statements at beginning of program->
4877  // useModule( "implicit" );
4878  useModule( "basic" );
4879  useModule( "basicio" );
4880 
4881  int res = 0;
4882  res = compileContext( ctx );
4883 
4884  if ( res < 0 )
4885  return res;
4886  // reinit();
4887 
4888  if ( haveProgram )
4889  {
4890  // the local frame should be empty, so we can use it.
4891  // program->append( StoredToken( Mod_Basic, CTRL_MAKELOCAL, TYP_CONTROL, 0 ) );
4892  try
4893  {
4894  res = handleProgram2( program_ctx, 1 );
4895  }
4896  catch ( std::runtime_error& excep )
4897  {
4898  INFO_PRINT << excep.what() << "\n";
4899  res = -1;
4900  }
4901  catch ( ... )
4902  {
4903  res = -1;
4904  }
4905  if ( res < 0 )
4906  {
4907  fmt::Writer _tmp;
4908  _tmp << "Error detected in program body.\n"
4909  << "Error occurred at ";
4910  program_ctx.printOnShort( _tmp );
4911  INFO_PRINT << _tmp.str();
4912  // << program_ctx;
4913  return res;
4914  }
4915 
4916  // program->append( StoredToken( Mod_Basic, CTRL_JSR_USERFUNC, TYP_CONTROL, programPos ) );
4917  }
4918 
4919  unsigned last_posn;
4921  program->append( tkn, &last_posn );
4922 
4923  return 0;
4924 }
4925 
4926 // what am I, too good for stdio/ftell? geez...
4927 // rope getline
4928 int Compiler::getFileContents( const char* file, char** iv )
4929 {
4930 #ifdef _WIN32
4931  // unix does this automatically, duh
4932  // if (1 || check_filecase_)
4933  {
4934  std::string truename = Clib::GetTrueName( file );
4935  std::string filepart = Clib::GetFilePart( file );
4936  if ( truename != filepart && Clib::FileExists( file ) )
4937  {
4938  INFO_PRINT << "Case mismatch: \n"
4939  << " Specified: " << filepart << "\n"
4940  << " Filesystem: " << truename << "\n";
4941  }
4942  }
4943 #endif
4944 
4945 
4946  char* s = NULL;
4947 
4948  FILE* fp = fopen( file, "rb" );
4949  if ( fp == NULL )
4950  return -1;
4951 
4952  // Goes to the end of file
4953  if ( fseek( fp, 0, SEEK_END ) != 0 )
4954  {
4955  fclose( fp );
4956  return -1;
4957  }
4958 
4959  // in order to measure its size
4960  int filelen = ftell( fp );
4961  if ( filelen < 0 )
4962  {
4963  fclose( fp );
4964  return -1;
4965  }
4966 
4967  // and then return to beginning
4968  if ( fseek( fp, 0, SEEK_SET ) != 0 )
4969  {
4970  fclose( fp );
4971  return -1;
4972  }
4973 
4974  s = (char*)calloc( 1, filelen + 1 );
4975  if ( !s )
4976  {
4977  fclose( fp );
4978  return -1;
4979  }
4980 
4981  if (fread( s, filelen, 1, fp ) != 1 )
4982  {
4983  fclose( fp );
4984  return -1;
4985  }
4986 
4987  fclose( fp );
4988  *iv = s;
4989  return 0;
4990 }
4991 
4993 {
4994  std::string filename_part = modulename;
4995  filename_part += ".inc";
4996 
4997  std::string filename_full = current_file_path + filename_part;
4998 
4999  if ( filename_part[0] == ':' )
5000  {
5001  const Plib::Package* pkg = NULL;
5002  std::string path;
5003  if ( Plib::pkgdef_split( filename_part, NULL, &pkg, &path ) )
5004  {
5005  if ( pkg != NULL )
5006  {
5007  filename_full = pkg->dir() + path;
5008  std::string try_filename_full = pkg->dir() + "include/" + path;
5009 
5010  if ( verbosity_level_ >= 10 )
5011  INFO_PRINT << "Searching for " << filename_full << "\n";
5012 
5013  if ( !Clib::FileExists( filename_full.c_str() ) )
5014  {
5015  if ( verbosity_level_ >= 10 )
5016  INFO_PRINT << "Searching for " << try_filename_full << "\n";
5017  if ( Clib::FileExists( try_filename_full.c_str() ) )
5018  {
5019  if ( verbosity_level_ >= 10 )
5020  INFO_PRINT << "Found " << try_filename_full << "\n";
5021 
5022  filename_full = try_filename_full;
5023  }
5024  }
5025  else
5026  {
5027  if ( verbosity_level_ >= 10 )
5028  INFO_PRINT << "Found " << filename_full << "\n";
5029 
5030  if ( Clib::FileExists( try_filename_full.c_str() ) )
5031  INFO_PRINT << "Warning: Found '" << filename_full.c_str() << "' and '"
5032  << try_filename_full.c_str() << "'! Will use first file!\n";
5033  }
5034  }
5035  else
5036  {
5037  filename_full = compilercfg.PolScriptRoot + path;
5038 
5039  if ( verbosity_level_ >= 10 )
5040  {
5041  INFO_PRINT << "Searching for " << filename_full << "\n";
5042  if ( Clib::FileExists( filename_full.c_str() ) )
5043  INFO_PRINT << "Found " << filename_full << "\n";
5044  }
5045  }
5046  }
5047  else
5048  {
5049  INFO_PRINT << "Unable to read include file '" << modulename << "'\n";
5050  return false;
5051  }
5052  }
5053  else
5054  {
5055  if ( verbosity_level_ >= 10 )
5056  INFO_PRINT << "Searching for " << filename_full << "\n";
5057 
5058  if ( !Clib::FileExists( filename_full.c_str() ) )
5059  {
5060  std::string try_filename_full = compilercfg.IncludeDirectory + filename_part;
5061  if ( verbosity_level_ >= 10 )
5062  INFO_PRINT << "Searching for " << try_filename_full << "\n";
5063  if ( Clib::FileExists( try_filename_full.c_str() ) )
5064  {
5065  if ( verbosity_level_ >= 10 )
5066  INFO_PRINT << "Found " << try_filename_full << "\n";
5067 
5068  filename_full = try_filename_full;
5069  }
5070  }
5071  else
5072  {
5073  if ( verbosity_level_ >= 10 )
5074  INFO_PRINT << "Found " << filename_full << "\n";
5075  }
5076  }
5077 
5078  std::string filename_check = Clib::FullPath( filename_full.c_str() );
5079  if ( included.count( filename_check ) )
5080  return true;
5081  included.insert( filename_check );
5082 
5083  char* orig_mt;
5084 
5085  if ( getFileContents( filename_full.c_str(), &orig_mt ) )
5086  {
5087  INFO_PRINT << "Unable to read include file '" << filename_full << "'\n";
5088  return false;
5089  }
5090 
5091  CompilerContext mod_ctx( filename_full, program->add_dbg_filename( filename_full ), orig_mt );
5092 
5093  std::string save = current_file_path;
5094  current_file_path = getpathof( filename_full );
5095 
5096  bool res = inner_read_function_declarations( mod_ctx );
5097 
5098  current_file_path = save;
5099 
5100  free( orig_mt );
5101  return res;
5102 }
5103 
5105 {
5106  ctx.skipws();
5107  ctx.skipcomments();
5108 
5109  Clib::stracpy( curLine, ctx.s, sizeof curLine );
5110 
5111  char* t;
5112  t = strchr( curLine, '\r' );
5113  if ( t )
5114  t[0] = '\0';
5115  t = strchr( curLine, '\n' );
5116  if ( t )
5117  t[0] = '\0';
5118 }
5119 
5121 {
5122  CompilerContext tctx( ctx );
5123  Token tkn;
5124  for ( ;; )
5125  {
5126  readCurLine( tctx );
5127 
5128  CompilerContext save_ctx( tctx );
5129  if ( getToken( tctx, tkn ) != 0 )
5130  break;
5131 
5132 
5133  if ( tkn.id == RSV_CONST )
5134  {
5135  if ( handleConstDeclare( tctx ) )
5136  {
5137  INFO_PRINT << "Error in const declaration\n" << tctx;
5138  return false;
5139  }
5140  }
5141  else if ( tkn.id == RSV_ENUM )
5142  {
5143  if ( handleEnumDeclare( tctx ) )
5144  {
5145  INFO_PRINT << "Error in enum declaration\n" << tctx;
5146  return false;
5147  }
5148  }
5149  else if ( tkn.id == RSV_FUNCTION || tkn.id == RSV_EXPORTED )
5150  {
5151  tctx = save_ctx;
5152  if ( forward_read_function( tctx ) )
5153  {
5154  INFO_PRINT << "Error reading function\n" << tctx;
5155  return false;
5156  }
5157  }
5158  else if ( tkn.id == RSV_INCLUDE_FILE )
5159  {
5160  Token tk_module_name;
5161 
5162  if ( getToken( tctx, tk_module_name ) == 0 &&
5163  ( tk_module_name.id == TOK_IDENT || tk_module_name.id == TOK_STRING ) )
5164  {
5165  if ( !read_function_declarations_in_included_file( tk_module_name.tokval() ) )
5166  {
5167  // read.. prints out an error message
5168  return false;
5169  }
5170  }
5171  }
5172  else if ( tkn.id == RSV_USE_MODULE )
5173  {
5174  Token tk_module_name;
5175  if ( getToken( tctx, tk_module_name ) == 0 &&
5176  ( tk_module_name.id == TOK_IDENT || tk_module_name.id == TOK_STRING ) )
5177  {
5178  int res = useModule( tk_module_name.tokval() );
5179  if ( res < 0 )
5180  return false;
5181  }
5182  }
5183  }
5184  return true;
5185 }
5186 
5188 {
5189  bool res = inner_read_function_declarations( ctx );
5190  included.clear();
5191  constants.clear();
5192  program->clear_modules();
5193  return res;
5194 }
5195 
5197 {
5198  CompilerContext ctx( uf.ctx );
5199  // cout << "emitting " << uf.name << ": " << program->tokens.next() << endl;
5200  int res = handleBracketedFunction3( uf, ctx );
5201  if ( res < 0 )
5202  {
5203  INFO_PRINT << "Error in function '" << uf.name << "'.\n" << ctx;
5204  }
5205  return res;
5206 }
5207 
5209 {
5210  for ( unsigned i = 0; i < uf.forward_callers.size(); ++i )
5211  {
5212  patchoffset( uf.forward_callers[i], uf.position );
5213  }
5214 }
5215 
5217 {
5218  bool any;
5219  do
5220  {
5221  any = false;
5222  for ( auto& elem : userFunctions )
5223  {
5224  UserFunction& uf = elem.second;
5225  if ( ( uf.exported || compiling_include || !uf.forward_callers.empty() ) && !uf.emitted )
5226  {
5227  int res = emit_function( uf );
5228  if ( res < 0 )
5229  return res;
5230  uf.emitted = any = true;
5231  }
5232  }
5233  } while ( any );
5234 
5235  for ( auto& elem : userFunctions )
5236  {
5237  UserFunction& uf = elem.second;
5238  if ( uf.emitted )
5239  {
5240  patch_callers( uf );
5241  }
5242  else
5243  {
5244  // cout << "not emitted: " << uf.name << endl;
5245  }
5246  }
5247  return 0;
5248 }
5249 
5251 {
5252  while ( prog.modules.size() > checkpoint.module_count )
5253  {
5254  delete prog.modules.back();
5255  prog.modules.pop_back();
5256  }
5257  prog.tokens.setcount( checkpoint.tokens_count );
5258  prog.symbols.setlength( checkpoint.symbols_length );
5259  while ( prog.sourcelines.size() > checkpoint.sourcelines_count )
5260  prog.sourcelines.pop_back();
5261  while ( prog.fileline.size() > checkpoint.fileline_count )
5262  prog.fileline.pop_back();
5263  while ( prog.dbg_filenum.size() > checkpoint.tokens_count )
5264  {
5265  prog.dbg_filenum.pop_back();
5266  prog.dbg_linenum.pop_back();
5267  prog.dbg_ins_blocks.pop_back();
5268  prog.dbg_ins_statementbegin.pop_back();
5269  prog.statementbegin = false;
5270  }
5271 
5272  for ( auto& elem : userFunctions )
5273  {
5274  UserFunction& uf = elem.second;
5275  while ( !uf.forward_callers.empty() && uf.forward_callers.back() >= checkpoint.tokens_count )
5276  {
5277  uf.forward_callers.pop_back();
5278  }
5279  }
5280 }
5281 
5285 bool is_web_script( const char* file )
5286 {
5287  const char* ext = strstr( file, ".hsr" );
5288  if ( ext && memcmp( ext, ".hsr", 5 ) == 0 )
5289  return true;
5290  ext = strstr( file, ".asp" );
5291  if ( ext && memcmp( ext, ".asp", 5 ) == 0 )
5292  return true;
5293  return false;
5294 }
5295 
5300 {
5301  std::string output;
5302  output = "use http;";
5303  output += '\n';
5304 
5305  bool reading_html = true;
5306  bool source_is_emit = false;
5307  const char* s = fc.contents();
5308  std::string acc;
5309  while ( *s )
5310  {
5311  if ( reading_html )
5312  {
5313  if ( s[0] == '<' && s[1] == '%' )
5314  {
5315  reading_html = false;
5316  if ( !acc.empty() )
5317  {
5318  output += "WriteHtmlRaw( \"" + acc + "\");\n";
5319  acc = "";
5320  }
5321  s += 2;
5322  source_is_emit = ( s[0] == '=' );
5323  if ( source_is_emit )
5324  {
5325  output += "WriteHtmlRaw( ";
5326  ++s;
5327  }
5328  }
5329  else
5330  {
5331  if ( *s == '\"' )
5332  acc += "\\\"";
5333  else if ( *s == '\r' )
5334  ;
5335  else if ( *s == '\n' )
5336  acc += "\\n";
5337  else
5338  acc += *s;
5339  ++s;
5340  }
5341  }
5342  else
5343  {
5344  if ( s[0] == '%' && s[1] == '>' )
5345  {
5346  reading_html = true;
5347  s += 2;
5348  if ( source_is_emit )
5349  output += " );\n";
5350  }
5351  else
5352  {
5353  output += *s++;
5354  }
5355  }
5356  }
5357  if ( !acc.empty() )
5358  output += "WriteHtmlRaw( \"" + acc + "\");\n";
5359  fc.set_contents( output );
5360 }
5361 
5362 
5369 int Compiler::compileFile( const char* in_file )
5370 {
5371  int res = -1;
5372  try
5373  {
5374  std::string filepath = Clib::FullPath( in_file );
5375  referencedPathnames.push_back( filepath );
5376  current_file_path = getpathof( filepath );
5377  if ( verbosity_level_ >= 11 )
5378  INFO_PRINT << "cfp: " << current_file_path << "\n";
5379  Clib::FileContents fc( filepath.c_str() );
5380 
5381  if ( is_web_script( filepath.c_str() ) )
<