File Language.hpp
File List > source > Worlds > Language.hpp
Go to the documentation of this file
#pragma once
// Debug
#include <iostream>
#include <functional>
#include <tao/pegtl.hpp>
#include <tao/pegtl/contrib/parse_tree.hpp>
#include "core/EasyLogging.hpp"
using clogged::Logger;
using clogged::Team;
using clogged::LogLevel;
namespace pegtl = tao::pegtl;
namespace worldlang{
std::string stripWhitespace(const std::string& input) {
std::string result;
bool insideString = false;
for (char c : input) {
if (c == '\"') {
insideString = !insideString;
}
// https://en.cppreference.com/w/cpp/string/byte/isspace
if (!insideString && std::isspace(c) && c != '\n') {
// Skip whitespace outside of strings, excluding newlines
continue;
}
result += c;
}
return result;
}
// Equivalent regex
// \-?[0-9]+(.[0-9]+)?
struct number : pegtl::seq <
pegtl::opt<
pegtl::one< '-' >
>,
pegtl::plus< pegtl::digit >,
pegtl::opt<
pegtl::one< '.' >,
pegtl::plus< pegtl::digit >
>
>
{};
struct string : pegtl::seq<
pegtl::one< '"' >,
pegtl::star<
pegtl::not_one<'"'>
>,
pegtl::one< '"' >
>
{};
//[_a-zA-Z][_a-zA-Z0-9]*
// (C-style identifier, case sensitive)
struct identifier : pegtl::identifier
{};
// identifier list
// Contains one or more identifiers.
// This matches expressions such as
// "name" or "a,b,c".
struct identifier_list;
struct identifier_list : pegtl::sor<
pegtl::seq<
identifier,
pegtl::one< ',' >,
identifier_list
>,
identifier
>
{};
// Match operators of same priority as addition
struct op_prio_add : pegtl::one< '+', '-' >
{};
// Match operators of same priority as multiplication
struct op_prio_mul : pegtl::one< '*', '/' >
{};
// Match operators of same priority as comparison ops
struct op_prio_comp : pegtl::sor<
TAO_PEGTL_STRING("=="),
TAO_PEGTL_STRING("!="),
TAO_PEGTL_STRING("<="),
TAO_PEGTL_STRING(">="),
pegtl::one< '<' >,
pegtl::one< '>' >
>
{};
// Must forward-declare for recursion
struct expression;
struct expression_list;
// Function call
// Matches expressions of form
// "id(arg1,arg2,...)" or "id()"
struct function : pegtl::seq<
identifier,
pegtl::one< '(' >,
pegtl::opt<expression_list>,
pegtl::one< ')' >
>
{};
// Match an expression or element representing a single value
// examples: 5,(-38),myvar1,myfunc1(7)
// Note that functions can technically return multiple values or none,
// so this isn't perfect.
// Expressions within parentheses is also counted as an element to allow
// for recursion, so "(3+5+8)" is a valid element
struct element : pegtl::sor<
function,
identifier,
number,
string,
pegtl::seq<
pegtl::one< '(' >,
expression,
pegtl::one< ')' >
>
>
{};
// Matches one multiplication expression or a single element
// "a*b" or "c"
struct mul_a : pegtl::sor<
pegtl::seq<
element,
op_prio_mul,
element
>,
element
>
{};
struct mul;
// Recursively matches multiplication operations
// Ex. "1*4", "a*b*c"
struct mul : pegtl::sor<
pegtl::seq<
mul_a,
op_prio_mul,
mul
>,
mul_a
>
{};
// Matches one addition expression or a multiplication subexpression
// "a+b" or "a+b*c", with "b*c" as the subexpression
struct add_a : pegtl::sor<
pegtl::seq<
mul,
op_prio_add,
mul
>,
mul
>
{};
// Matches entire addition expression
struct add;
struct add : pegtl::sor<
pegtl::seq<
add_a,
op_prio_add,
add
>,
add_a
>
{};
struct comp_a : pegtl::sor<
pegtl::seq<
add,
op_prio_comp,
add
>,
add
>
{};
// Matches entire comparison expression
struct comp;
struct comp : pegtl::sor<
pegtl::seq<
comp_a,
op_prio_comp,
comp
>,
comp_a
>
{};
// Match an expression intended to evaluate to a single value.
struct expression : pegtl::sor<
comp
>
{};
struct expression_list;
// A list of expressions. Can contain one or more expressions (not zero!)
// "123,345+456,func(3)+6*8"
struct expression_list : pegtl::sor<
pegtl::seq<
expression,
pegtl::one< ',' >,
expression_list
>,
expression
>
{};
// Assignment expression
// "var=value", or "var,vars=val1,val2", etc.
struct assignment : pegtl::seq<
identifier_list,
pegtl::one< '=' >,
expression_list
>
{};
// Comment
// # Comment text goes here
struct comment : pegtl::seq<
pegtl::one< '#' >,
pegtl::until<
pegtl::eol
>
>
{};
struct statement_list;
// Matches a block of code
// ex. { /*code*/ }
struct code_block : pegtl::seq<
pegtl::one< '{' >,
pegtl::eol,
statement_list,
pegtl::one< '}' >,
pegtl::eol
>
{};
struct statement : pegtl::sor<
pegtl::seq<
function,
pegtl::opt<code_block>,
pegtl::opt<pegtl::eol>
>,
pegtl::seq<
assignment,
pegtl::opt<pegtl::eol>
>,
comment,
pegtl::eol
>
{};
struct statement_list;
// function()\nfunction()...
struct statement_list : pegtl::sor<
pegtl::seq<
statement,
statement_list
>,
statement
>
{};
struct program : pegtl::seq<
statement_list,
pegtl::eolf
>
{};
template< typename Rule >
using selector = tao::pegtl::parse_tree::selector< Rule,
tao::pegtl::parse_tree::store_content::on<
number,
string,
function,
identifier,
identifier_list,
expression,
expression_list,
statement,
statement_list,
code_block,
program,
assignment,
op_prio_add,
op_prio_mul,
op_prio_comp
>,
tao::pegtl::parse_tree::fold_one::on<
add_a,
mul_a
>
>;
struct Unit {
enum class Type {
number,
string,
identifier,
function,
operation,
function_decl,
};
Type type;
std::string value;
};
std::vector<Unit> parse_to_code(std::string program){
program = stripWhitespace(program);
pegtl::string_input in(program, "program");
std::vector<Unit> out{};
auto log = Logger::Log() << Team::TEAM_4 << LogLevel::DEBUG;
log << "Entering parser" << std::endl;
std::function<void(const std::unique_ptr<pegtl::parse_tree::node>&)> traverse =
[&out, &traverse, &log](const std::unique_ptr<pegtl::parse_tree::node>& node) -> void{
const std::string_view& type = node->type;
// visit:
if (type == "worldlang::number"){
out.push_back(Unit{Unit::Type::number, node->string()});
} else if (type == "worldlang::identifier"){
out.push_back(Unit{Unit::Type::identifier, node->string()});
} else if (type == "worldlang::string"){
// trim quotes off
out.push_back(Unit{Unit::Type::string, node->string().substr(1,node->string().size()-2)});
} else if (type == "worldlang::function"){
// (operator_endargs) arg arg arg function_name
out.push_back(Unit{Unit::Type::operation, "endargs"});
if (node->children.size() > 1)
traverse(node->children[1]);
// out.push_back(Unit{Unit::Type::number, std::to_string(argcount)});
out.push_back(Unit{Unit::Type::function, node->children.at(0)->string()});
} else if (type == "worldlang::assignment"){
// identifier_list
// auto identifier_count = count(node->children.at(0));
out.push_back(Unit{Unit::Type::operation, "endargs"});
traverse(node->children.at(0));
// value
// Can't check expression count here: must be done at runtime
// since functions may have multiple returns
out.push_back(Unit{Unit::Type::operation, "endargs"});
traverse(node->children.at(1));
// expression
// out.push_back(Unit{Unit::Type::number, std::to_string(identifier_count)});
out.push_back(Unit{Unit::Type::operation, "="});
} else if (type == "worldlang::mul_a"
|| type == "worldlang::add_a"
|| type == "worldlang::expression"){
traverse(node->children.at(0));
if (node->children.size() > 1){
traverse(node->children.at(2));
out.push_back(Unit{Unit::Type::operation, node->children.at(1)->string()});
}
} else if (type == "worldlang::expression_list"
|| type == "worldlang::identifier_list"){
log << "Traversing " << type << "\n";
if (node->children.size()){
for (const auto& c : node->children){
traverse(c);
}
}
} else if (type == "worldlang::code_block"){
log << "Traversing " << type << "\n";
out.push_back(Unit{Unit::Type::operation, "start_block"});
traverse(node->children.at(0));
out.push_back(Unit{Unit::Type::operation, "end_block"});
} else if (type == "worldlang::statement"){
if (node->children.size()){
// auto value = node->children.at(0)->string();
auto& first = node->children.at(0);
if (first->type == "worldlang::function"
&& first->children.at(0)->string() != "if"
&& first->children.at(0)->string() != "for"
&& node->children.size() > 1
&& node->children.at(1)->type == "worldlang::code_block"){
// encode differently here to use for definitions
// <values> . a b c d e funcname(decl)
// funcname assigns variables, then jumps to code
traverse(node->children.at(0)); // function
out.back().type = Unit::Type::function_decl;
traverse(node->children.at(1)); // function code
} else {
for (const auto& child : node->children){
traverse(child);
}
}
}
out.push_back(Unit{Unit::Type::operation, "endline"});
} else {
log << "Type: " << type;
if (node->has_content())
log << " Content: " << node->string();
log << std::endl;
// visit all children
for (const auto& child : node->children){
traverse(child);
}
}
};
auto root = pegtl::parse_tree::parse < worldlang::program, worldlang::selector > (in);
if (root){
traverse(root->children[0]);
return out;
} else {
// parse error lol
log << LogLevel::WARNING << "Parse error!!" << std::endl;
return {};
}
}
} //worldlang