ht's Scheme Interpreter  1.0
a simplified scheme interpreter implementation
tokenizer.cpp
Go to the documentation of this file.
1 #include "tokenizer.hpp"
2 #include "types.hpp"
3 #include <string>
4 #include <vector>
5 #include <list>
6 #include <cstddef>
7 #include <algorithm>
8 #include <iostream>
9 
10 namespace
11 {
12  inline bool notSpecialChar(const std::string& s, size_t pos) { return pos<2 || s[pos-1]!='#' || s[pos-2]!='\\'; }
13  inline bool isChar(const std::string& s, size_t pos, char c) { return s[pos]==c && notSpecialChar(s, pos); }
14  inline std::string char2Str(char c) { std::string s; s.push_back(c); return s; }
15  inline bool cond( const std::string& s) { return !s.size(); }
16 }
17 
18 Tokenizer::Tokenizer(const std::vector<std::string>& lines)
19 {
20  split(lines);
21 }
22 
23 void Tokenizer::split(const std::vector<std::string>& lines)
24 {
25  bool inStr = false;
26  rawTokens.clear();
27  rawTokens.push_back("");
28  for (size_t i=0; i<lines.size(); ++i)
29  {
30  size_t pos=0;
31  //std::cout<<" begin of "<<i<<" "<<inStr<<std::endl;
32  while (true)
33  {
34  if (pos>=lines[i].length())
35  break;
36  if (inStr)
37  {
38  while(pos<lines[i].size() && !isChar(lines[i], pos, '"'))
39  rawTokens.rbegin() -> push_back(lines[i][pos++]);
40  if (pos<lines[i].size())
41  {
42  rawTokens.rbegin() -> push_back('"');
43  inStr = false;
44  rawTokens.push_back("");
45  ++pos;
46  }
47  }
48  else
49  {
50 
51  while (pos<lines[i].length() && (lines[i][pos]==' ' || lines[i][pos]=='\t'))
52  ++pos;
53  size_t until = std::min( lines[i].find(' ', pos), lines[i].find('\t', pos));
54  std::string t;
55  if (until == lines[i].npos)
56  t=lines[i].substr(pos);
57  else
58  t=lines[i].substr(pos, until-pos);
59 
60  //std::cout<< "LOG: big token |"<<t<<"| from "<<pos <<" to "<<until<<" instr:"<<inStr<<std::endl;
61  pos = until;
62 
63  if (t!="")
64  for(size_t j=0; j<t.size(); ++j)
65  {
66  if (inStr)
67  {
68  rawTokens.rbegin() -> push_back(t[j]);
69  if (isChar(t, j, '"'))
70  inStr = false;
71  } else
72  {
73  if (isChar(t, j, '(') || isChar(t, j, ')'))
74  {
75  rawTokens.push_back(char2Str(t[j]));
76  rawTokens.push_back("");
77  }
78  else if (isChar(t, j, '"'))
79  {
80  rawTokens.push_back(char2Str(t[j]));
81  inStr = true;
82  }
83  else
84  rawTokens.rbegin() -> push_back(t[j]);
85  }
86 
87  }
88 
89  if (!inStr)rawTokens.push_back("");
90  } //if (inStr) else
91  } //while
92  if (inStr) rawTokens.rbegin() -> push_back('\n'); else rawTokens.push_back("");
93  } //for
94  rawTokens.remove_if(cond);
95 
96 }//function
Tokenizer(const std::vector< std::string > &lines)
Definition: tokenizer.cpp:18
void split(const std::vector< std::string > &lines)
Definition: tokenizer.cpp:23
std::list< std::string > rawTokens
Definition: tokenizer.hpp:12