--[[ Copyright (C) 2021 Momi-g

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
--]]
local u = require('util');	--s = pretty.write(t) pretty.dump(t)
local ffi = require('ffi')
--require("strict")
local ctx= {}
--global


--ソースは事前に全て\nに変換しとく。
function ctx.srcemsg(str, pos, msg) 
	--posはエラーstrの最後尾+1
	local pre = string.sub(str, 1, pos-1)
	local dmy, lcnt = string.gsub(pre, "\n", "");	--line count for info
	lcnt = lcnt  or  1
	lcnt=lcnt+1
	--表示用にラスト一行を取得
	local eline=""
	local cnt=1
	local scnt=0
	for s in string.gmatch(str, "[^\n]*[\n]")  do do eline=s;if cnt==lcnt then  do break end  end ;cnt=cnt+1;scnt=scnt+#s end::_luka_LOOPNEXT::end 
	local cpos = pos-scnt
	eline = string.sub(eline, 1, cpos-1).."@"..string.sub(eline, cpos, -1)
	local sbuf = u.fprintf(nil, "errpos %d,%d:%s: %s",lcnt, cpos, eline, msg )
	local emsg = u.fperr(nil, sbuf, 1)
	return emsg
 end

local SENTINEL_RNAME="%R_OPT"
--tear-off string
function ctx.f_tof(str, reg, pos, ctg) 
	local rstr = string.match(str, reg, pos)
	if rstr~=nil then  pos=pos + #rstr  end 
	return rstr,pos,ctg
 end

-- '' [] "" r// lit系tokenを切り取る 後ろの]系は外す。posはnextから
function ctx.f_blkstr(str, pos, resc, rend, ctg) 
	--resc:[\\], rend:['] etc
	local rs=""
	local s, e = ctx.f_blkend(str, pos, resc, rend)
	if s==nil then 	--no blkend
		ctg="err"
		rs=e
		e=pos
	 
	else 
		rs=string.sub(str, pos, s-1)	--尻尾の==]は含めない
		e=e+1
	 end 
	return rs,e,ctg	--posはnext位置になる
 end

-- fc("(-....\(...-)", 1, '\\', "-)" ) で-)の")"位置を返す
-- escとendはluaパターン eposは]の一番最後を示す
-- rescが""ならesc無しで純粋にrendを探す
function ctx.f_blkend(str, pos, resc, rend) 
	resc=resc or ""
	local spos=pos
	local srtn, ertn=0, 0
	pos=pos-1
	while 1 do do 
	::lb_NEXT::
		pos=pos+1
		local s, e = string.find(str, resc, pos)	-- \"esc
		if s==nil or resc=="" then  s = string.len(str)+1  end 	--捏造
		local ss, ee = string.find(str, rend, pos)	-- "end
		--no blkend
		if ss==nil then  return nil, "closing symbol is not found: "..rend  end 
		if  s  and  s<ss then pos=s+1;goto lb_NEXT end 
		srtn=ss
		ertn=ee
		 do break end 
	 end::_luka_LOOPNEXT::end 
	return srtn, ertn
 end

--uni[]は[^-^]がありえるので小細工が必要 単体は[^]でいいけど >> 違法 \^のみ
--[\u005e] == [^] は\^に変換しないとダメ[^]]も対応していない。
--...だめだ。^はoctはarrで検査してるけどuniはそのままなので分離できない。
-- [^\u0001\u0002] != ([^\u0001]|[^\u0002])
--は数学的に成立しない 内部の移動のみに止めるべき。
--先頭に\u005e-は不可能にしておいて\u005e単体は最後に持っていく
--notがついているならそのままでいい 肩鎖のみ
function ctx.uniclass(str) 
	local cpy = str
	local inv = string.match(str, "^%^") or ""
	if inv~="" then  cpy=string.sub(str, 2, -1)  end 

	local cur=1
	local hex = "[0-9a-fA-F]"
	local u4 = "\\u"..hex..hex..hex..hex
	local u8 = "\\U"..hex..hex..hex..hex..hex..hex..hex..hex
	
	local buf = string.gsub(cpy, u8, "")
	buf = string.gsub(buf, u4, "")
	buf = string.gsub(buf, "%-", "")
	if buf~="" then  return nil, "reg-uniclass holds badstr: "..buf  end 
	
	local sym={}	--uni系は[].:=^の特殊文字が邪魔なので別枠でメモ
	local res=""
	local ptn = "(\\[uU]([0-9a-fA-F]+)%-\\[uU]([0-9a-fA-F]+))"
	for a,s,e in string.gmatch(cpy, ptn)  do do 
		local snum = tonumber(s, 16)
		local enum = tonumber(e, 16)
		if snum==0x5e and enum==0x5e then  sym["^"]="\\u005e"; goto lb_NEXT  end 
		if snum==0x5e then 
			snum=0x5f
			sym["^"]="\\u005e"
			a="\\u005f-\\U"..string.format("%08x", enum)
			--書き換えとメモ
		 end 
		if snum>0x10ffff or enum>0x10ffff then  return nil, "unicode num >0x10FFFF: "  end 
		if snum>enum then  return nil, "unicode bad range s>e: "  end 
		res=res..a
		::lb_NEXT::
	 end::_luka_LOOPNEXT::end 
	cpy = string.gsub(cpy, ptn, "")
	ptn = "(\\[uU]([0-9a-fA-F]+))"
--
	for a,s in string.gmatch(cpy, ptn)  do do 
		local snum = tonumber(s, 16)
		if snum>0x10ffff then  return nil, "unicode num >0x10FFFF: "  end 
		-- -[^ 2d,5b,5e は設置順序が決まってる
		-- -は常に先頭、^は常に尻尾 [ は尻尾か[^のどっちか
		if snum==0x2d then  sym["-"]="\\u002d" 
		elseif snum==0x5b then  sym["["]="\\u005b" 
		elseif snum==0x5e then  sym["^"]="\\u005e" 
		else  res=res..a  end 
		-- :とか]は尻尾に追加される [が無い以上特殊にはなりえない
	 end::_luka_LOOPNEXT::end 
	res= (sym["-"] or "")..res
	res= res..(sym["["] or "")
	res= res..(sym["^"] or "")
	res= inv..res
	if res=="\\u005e" then  return "\\^" end 	--[]系で[^]だけは表現不可能
	return 	"["..res.."]"
 end

--octclassは()にばらされるので^問題は不要
function ctx.octclass(str) 
	local cpy=str
	local inv = string.match(str, "^%^")
	if inv then  cpy=string.sub(str, 2, -1)  end 

	local cur=1
	local nml=""
	cpy = string.gsub(cpy, "([0-9]+)", "0%1")
	cpy = string.gsub(cpy, "\\0([0-9][0-9][0-9])", "\\%1")
	local ptn1 = "\\([0-3][0-7]?[0-7]?)%-\\([0-3][0-7]?[0-7]?)"
	local ptn2 = "\\([0-3][0-7]?[0-7]?)"	--ptn1の後の生き残り 単体系
	--range
	local binarr = {}
	for s,e in string.gmatch(cpy, ptn1) do do 
		local snum = tonumber(s,8)
		local enum = tonumber(e,8)
		if enum<snum then  return nil, "bad reg-octclass range, s>e:"..s..">"..e  end 
		for i=snum, enum do do  binarr[i]=1  end::_luka_LOOPNEXT::end 
	 end::_luka_LOOPNEXT::end 
	cpy = string.gsub(cpy, ptn1, "")
	for s in string.gmatch(cpy, ptn2)  do do  binarr[tonumber(s,8)]=1  end::_luka_LOOPNEXT::end 
	cpy = string.gsub(cpy, ptn2, "")
	--残骸
	if cpy~="" then  return nil, "reg-octclass holds bad oct: "..str..": "..cpy  end 
	-- 0-255を\123系でor結合 esc変換に備える	^は反転させる
	local res=""
	for i=0,255 do do 
		if next(binarr)==nil then  do break end  end 
		local buf=nil
		if  not inv and binarr[i] then  buf=string.format("\\%03o", i)	 
		elseif inv and binarr[i]==nil then  buf=string.format("\\%03o", i)  end 
		local c = string.char(i)
		-- c=\とか)のescは後ろで纏めて追加する。[]の前にも\123がいたりする
		if buf then  res=res.."|"..buf  end 
	 end::_luka_LOOPNEXT::end 
	if res~="" then  res="("..string.sub(res, 2,-1)..")"  end 
	return res
 end


--[:alpha:]系を直した 前後[]は除かれてるので追加して返す
-- \n 文字escは全範囲で有効にしとく
function ctx.f_kickoct(str, ctg) 
	local cpy=string.sub(str,2,-2)
	--oct-class
	if ctg=="bre"  and  string.match(cpy, "\\[0-9]")  then 
		return nil, "BRE doesnt allow binary class data, [\\nnn]"
	 
	elseif string.match(cpy, "\\[0-9]") then  return ctx.octclass(cpy)  
	elseif string.match(cpy, "\\[uU]") then  return ctx.uniclass(cpy)  end 
	return "["..cpy.."]"
 end

function ctx.getclass(str, spos) 
	local cur=spos+1	--[が含まれるので
	if string.sub(str, cur, cur+1)=="^]" then  cur=cur+2  end 	--[^]] を逃がす
	local res=""
	local farr={}
	while 1 do do 
		local spos = string.find(str, "%[([%.%:%=])", cur)
		local eblk = string.find(str, "]", cur, true)
		--[ [::] ] 系か[ [:dmy ] かの判別が必要 先に]が閉じてればよし
		if spos==nil then  do break end  end 
		if eblk<spos then  do break end  end 
		cur=spos+2
		local estr = string.sub(str, spos+1, spos+1).."]"
		local epos = string.find(str, estr, cur, true)
		if epos==nil then  do break end  end 
		epos=epos+1
		for i=spos, epos do do farr[i]=1 end::_luka_LOOPNEXT::end 
		cur=epos+1
	 end::_luka_LOOPNEXT::end 
	if cur==spos+1 then  cur=cur+1  end 	--[]はエラー []]ならok
	while 1 do do 
		cur = string.find(str, "]", cur, true)
		if cur==nil then 
			res=nil
			emsg="closing reg-class symbol not found"
			return res, emsg
		 end 
		if farr[cur]==nil then  do break end  end 
		cur=cur+1
	 end::_luka_LOOPNEXT::end 
	res = string.sub(str, spos, cur)
	return res
 end


-- ereとbreに対応させるのと修正
-- eb// eb"" eb'' どれか。 内部全てでendskipはescが必要にする
-- class修正 .と[]は文字クラスなのでバイナリは外だしする . は対応不可能なので
-- ([\0-\377]|.)で対応する uniを全部削除して、残りを()で加工すればいいか
--uregで.はmulti>>なければ1byteに対応した。そのまま放置でいい

-- regの[]はバイナリを扱えない。[]外で(123|123)を使って羅列するしか手段がない
-- ここでは\", [\123]のclass内部のbinを外だしにするだけ。他のesc, \a \1 \oooは
-- \u系と合わせて始末する
function ctx.f_regstr(str, pos, resc, rend, ctg) 
	--resc:[\\], rend:['] etc
	-- +1は速度が遅いのでスキップ利用
	resc=resc or ""
	local ipos=pos	--先頭"を飛ばす
	pos=ipos-1
	local rs=""
	local elen = #str+1
	local deltb={}
	while 1 do do 
		::lb_NEXT::
		pos=pos+1
		local s, e = string.find(str, rend, pos)	-- "end
		local ss, ee = string.find(str, resc, pos)	-- \ esc
		if ss==nil or resc=="" then  ss = string.len(str)+1  end 	--捏造
		local s_, e_ = string.find(str, resc..rend, pos)	-- \"	omitに使う
		local sss, eee = string.find(str, "[%[]", pos)	--charclass \[こんなのもある
--regexのescは固定なのでblockのescとかぶってもうまく始末する必要がある
--sedを参考に. ハードコードになるけど\\でもいいかもしれない

		-- これが正しい。close charは[]内部でescしてはいけない sedと同じ方式
		if s==nil then 	--no blkend
			ctg="err"
			rs="closing symbol is not found: "..rend.." "..string.sub(str, ipos-1, ipos+10)
			 do break end 
		 end 
--print(pos, ee, s,ss,sss,ssss, elen)
		ss=ss or elen
		sss=sss or elen

		if sss<s  and  sss<ss then 
			--hit [  加工して\123系は外にだす
			rs = rs..string.sub(str, pos, sss-1)
			--normalは普通に追加しとく
			local res, emsg = ctx.getclass(str, sss)
--print(res, emsg, "e_")
			if emsg then 
				ctg="err"
				rs=emsg
				 do break end 
			 end 
--print(22222, res, emsg)
			pos = sss + #res -1	--上で+1するのでendを入れる
			--[]付きclass内部文字の羅列たち これがベースになる
			local sgrp, emsg = ctx.f_kickoct(res, ctg)
--print(res, sgrp)
			if emsg then 
				ctg="err"
				rs=emsg
				 do break end 
			 end 
			--\123系は()に変換 [\1]は[\001]に直して外だし \u系とorgは放置
			rs = rs..sgrp
			goto lb_NEXT
		 end 
		if ss<s  and  ss<sss then 
			--hit esc \"なら"に変換してそれ以外は\xxxのまま放置
			-- >> \[ \{系があるので+1で始末する
			-- blkのescとregのescがかぶっているが実装を優先する
			--後処理の\uとかでそのまま処理させる
			rs = rs..string.sub(str, pos, ss-1)	--escの直前までとって\b or "を追加
			--検査 end系なら escは切らないといけない
			local buf = string.sub(str, ss, ee+1 ) --長すぎは最長になる
			local npos = ee+1
			if  ss == s_  then 
				buf=string.sub(str, ee+1, e_ )
				npos=e_
			 end 
			rs = rs..buf
			pos= npos
			goto lb_NEXT
		 end 
		--hit
		rs = rs..string.sub(str, pos, s-1)
		pos=e
		 do break end 
	 end::_luka_LOOPNEXT::end 
	pos=pos+1	--next位置
	return rs,pos,ctg
 end

--file読みだったけど外だししたので%R_optだけになった
function ctx.src2str(str, exp) 
	--pegルールに最終ルール追加。オプションによって出力方式を変える。
	if   string.match(exp, "n")  then  str = str.."\n"..SENTINEL_RNAME.." <- . { _0 = '' }\n"  
	elseif  string.match(exp, "N")  then  str = str.."\n"..SENTINEL_RNAME.." <- . { _E }\n"  
	elseif 1 then  str = str.."\n"..SENTINEL_RNAME.." <- . \n"  end 
	return str
 end

-- rtnは tb, srcstr かnil, emsg
function ctx.f_scanpeg(data, exp) 
	exp=exp or ""
	local str, emsg = ctx.src2str(data, exp)
	if str==nil then return str, emsg end 
	local cur=1;	--seekcur.
	local ctg="";	--category
	local mcur=string.len(str)
	local rs="";	--rtnstr
	local rc=0
	local terms = {}
	local aflg=0	--act系の存在で仕分けが必要になった
	local grule=1	--一般ルール origモードで%R_OPTの{}をrRから逃がすのに必要になった

	--lex. f_tofは結果を出してcurを先頭に進める。常に未知の先頭。continueがない。
::lbl_NEXT::
	while cur<=mcur do do 
		--while...alt goto
		while true do do 
			--reg
			rs=string.sub(str,cur,cur+1)
			if rs=="e/" then 	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", "[/]", "ere");  do break end   end 
			if rs=='e"' then 	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", '["]', "ere");  do break end   end 
			if rs=="e'" then 	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", "[']", "ere");  do break end   end 

			if rs=="b/" then 	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", "[/]", "bre");  do break end   end 
			if rs=='b"' then 	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", '["]', "bre");  do break end   end 
			if rs=="b'" then 	rs,cur,ctg = ctx.f_regstr(str, cur+2, "[\\]", "[']", "bre");  do break end   end 
			
			--heredoc
			if rs=='h"' then 	rs,cur,ctg = ctx.f_blkstr(str, cur+2, "", '["]', "hlit");  do break end   end 
			if rs=="h'" then 	rs,cur,ctg = ctx.f_blkstr(str, cur+2, "", "[']", "hlit");  do break end   end 		
			--lit,class, multi word. cur/escreg/endreg
			rs=string.sub(str,cur,cur)
			if rs=="'" then 	rs,cur,ctg = ctx.f_blkstr(str, cur+1, "[\\]", "[']", "lit");  do break end   end 
			if rs=='"' then 	rs,cur,ctg = ctx.f_blkstr(str, cur+1, "[\\]", '["]', "lit");  do break end   end 
			if rs=="[" then 	rs,cur,ctg = ctx.f_blkstr(str, cur+1, "[\\]", '[%]]', "class");  do break end   end 
		
			--blank
			rs,cur,ctg=ctx.f_tof(str,"^[#][^\n]*[\n]", cur, "skip"); if rs then  do break end  end  --lcmt
			rs,cur,ctg=ctx.f_tof(str,"^[ \t\r\v\f\n]+", cur, "skip"); if rs then  do break end  end  --blank
			--nlはblankに統合。actもLA(2)で探索。;は無視。
			
			--rule term
			if aflg<=0 then 
				rs,cur,ctg=ctx.f_tof(str,"^[_a-zA-Z][_a-zA-Z0-9]*", cur, "ident"); if rs then  do break end  end 
				rs,cur,ctg=ctx.f_tof(str,"^%"..SENTINEL_RNAME, cur, "ident"); if rs then grule=nil;  do break end  end 
				--%R_OPTが必要なのでorigからここで逃がす gruleのフラグを潰す
			 end 
			rs,cur,ctg=ctx.f_tof(str,"^[<][-]", cur, "def"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[/]", cur, "subdef"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[.]", cur, "class"); if rs then  do break end  end 
			--add BOF
			rs,cur,ctg=ctx.f_tof(str,"^!![.]", cur, "bof"); if rs then  do break end  end 
			--"lit" 'lit' [cls] is upper
			--symbol
			rs,cur,ctg=ctx.f_tof(str,"^[(]", cur, "("); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[)]", cur, ")"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[!]", cur, "pre"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[&]", cur, "pre"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[*]", cur, "suf"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[?]", cur, "suf"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[+]", cur, "suf"); if rs then  do break end  end 
			
			--act
			rs,cur,ctg=ctx.f_tof(str,"^[{]", cur, "{"); if rs then aflg=aflg+1;  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[}]", cur, "}"); if rs then aflg=aflg-1;  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[_]E", cur, "aE"); if rs then  do break end  end 	--errsym@act
			rs,cur,ctg=ctx.f_tof(str,"^[_][0]", cur, "aC"); if rs then  do break end  end 	--lcnt微妙
			rs,cur,ctg=ctx.f_tof(str,"^[_][1-9][0-9]*", cur, "aC"); if rs then  do break end  end 
			rs,cur,ctg=ctx.f_tof(str,"^[:]?[=]", cur, "="); if rs then  do break end  end 	--assign
			rs,cur,ctg=ctx.f_tof(str,"^[;]", cur, "sep"); if rs then  do break end  end 	-- ; == nl @act
			
			--other == invalid char
			ctg="err"
			rs="badrule/bad char: "..string.sub(str, cur, cur)
			 do break end 
		 end::_luka_LOOPNEXT::end 
		--err
		if aflg<0 then ctg="err"; rs="uneven curly braces" end 
		if ctg == "err"  then 	return nil, ctx.srcemsg(str, cur, rs)  end 
		--charesc
		if   not string.match(exp, "[rR]",1)  then 
			--拡張モード以外はオリジナルだけ
			-- %R_OPTはNで{_E}が必須になるので逃がす
			if  string.find("{} aE aC = hlit bre ere sep bof", ctg, 1, true) and grule  then 
				if ctg~="lit" then 
					---1はendの']がズレるため
					return nil, ctx.srcemsg(str, cur-string.len(rs)-1, "bad word in orig peg mode: ctg/char ".. ctg.."/"..rs)
				 end 
			 end 
			--esc表現が標準外
			if ctg=="class" or ctg=="lit" then 
				local sbuf=rs
				sbuf = string.gsub(sbuf, "\\[\\%[%]nrt'\"]", "")
				sbuf = string.gsub(sbuf, "\\[0-2][0-7][0-7]", "")
				sbuf = string.gsub(sbuf, "\\[0-7][0-7]?", "")
				if  string.find(sbuf, "\\", 1, true)  then 
					return nil, ctx.srcemsg(str, cur-string.len(rs)-1, "bad escchar in orig peg mode: "..sbuf)
				 end 
			 end 
		 end 
		
		if ctg == "skip" then  goto lbl_NEXT  end 
		local s = rs
		--sは"" r//を除いた純粋な餡子なのでerrは尻尾から数えた方が安定する
		if ctg=="class" or ctg == "lit" or ctg=="bre" or ctg=="ere" then 
			--parse with LL(1)
			local cpos=1
			s=""
			local bstr = rs
			local sbuf=""
			local rcls=0
			local rg = 0
			if ctg=="bre" or ctg=="ere" then rcls= -1; rg=1 end 
			while 1 do do 
			::lb_NEXT::
				if string.len(bstr)<cpos then  do break end  end 
				--基本共通
				--chars
				if  string.match(bstr,'^\\n',cpos)  then  s=s.."\n";cpos=cpos+2; goto lb_NEXT end 
				if  string.match(bstr,'^\\r',cpos)  then  s=s.."\r";cpos=cpos+2; goto lb_NEXT end 
				if  string.match(bstr,'^\\t',cpos)  then  s=s.."\t";cpos=cpos+2; goto lb_NEXT end 
				--lit, peg-class	regは[]が複雑かつbackrefなので下で纏めて始末
				if  ctg=="class" or ctg=="lit" then 
					if  string.match(bstr,'^\\"',cpos)  then  s=s.."\"";cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,"^\\'",cpos)  then  s=s.."\'";cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,'^\\%[',cpos) then  s=s.."["; cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,'^\\%]',cpos) then  s=s.."]"; cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,'^\\\\',cpos) then  s=s.."\\";cpos=cpos+2; goto lb_NEXT end 
				 end 
				--reg
				if  rcls<0  and  rg==1  then 
					-- not reg-class + esc
					sbuf = string.sub(bstr, cpos, cpos)
					--bad char
					if sbuf=='$' then 
						return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2,
					 "use peg-eof '!.' instead of anchor '$' eg) e/abc$/ >> e/abc/ !.")
					 end 
					if sbuf=="\\" then 
						-- https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03
						sbuf = string.sub(bstr,cpos+1,cpos+1)
						if ctg=="bre"  and  string.find('^.[$*\\(){}', sbuf, 1, true)  then 
							s = s.."\\"..sbuf
							cpos = cpos+2
							goto lb_NEXT
						 end 
						if ctg=="ere"  and  string.find('^.[$()|*+?{\\', sbuf, 1, true)  then 
							s = s.."\\"..sbuf
							cpos = cpos+2
							goto lb_NEXT
						 end 
					 end 
				 end 
				--oct	-\277 \34, bkref\9 系も
				sbuf =  string.match(bstr, '^\\[0-9][0-7]?[0-7]?', cpos)
				while  sbuf  do do 
					--bkref \1-9 ==breのみ
					if ctg=="bre" and rcls<0 and #sbuf==2 and sbuf~="\\0" then 
						--regは\1系はスルー
						s = s..sbuf
						cpos = cpos+string.len(sbuf)
						goto lb_NEXT
					 
					elseif ctg=="ere" and rcls<0 and #sbuf==2 and sbuf~="\\0" then 
						--ereはbkrefはない。エラーにする
						return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2,
					 "regex ERE doesnt support back reference: \\1-9")
					 end 
					if string.match(sbuf, "[89]") then  do break end  end 	--\83etc >>err\8[3]
					--\0-\277 filter
					local num = tonumber(string.sub(sbuf, 2,2) )
					if #sbuf==4  and  num>2 then  do break end  end 	--\366 etc	\300> ...-r/R
					
					num = tonumber(string.sub(sbuf, 2), 8)
					cstr = string.char(num)
					--\ooo >> 1char >> add \\ if needed
					if ctg=="ere" and rcls<0 and string.find("^.[$()|*+?{\\",cstr,1,true)  then cstr="\\"..cstr 
					elseif ctg=="bre" and rcls<0 and string.find('^.[$*\\(){}', cstr, 1,true)  then cstr="\\"..cstr 
					elseif ctg=="class" and cstr=="-" then 
						--peg-class hyphen問題は-のみ\[は1byteでいい。oct-は小細工が必要
						--octの\055 - を始末 三連を頭にセット trick
						cstr=""
						if  not string.find("---", s, 1, true)  then s="---"..s end 
					 end 
					s = s..cstr
					cpos = cpos+string.len(sbuf)
					goto lb_NEXT
				 end::_luka_LOOPNEXT::end 
				--拡張, c99 unicode, classは基本のみ>>やっぱ拡張\377が使えない
				if  string.match(exp, "[rR]",1)  then 
					--chars
					if  string.match(bstr,'^\\a',cpos)  then  s=s.."\a";cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,'^\\b',cpos)  then  s=s.."\b";cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,'^\\v',cpos)  then  s=s.."\v";cpos=cpos+2; goto lb_NEXT end 
					if  string.match(bstr,'^\\f',cpos)  then  s=s.."\f";cpos=cpos+2; goto lb_NEXT end 
					--oct 2桁は上で始末済み
					sbuf = string.match(bstr, '^\\[0-3][0-7]?[0-7]?', cpos)
					if  sbuf  then 
						--reg後方参照形は頭で処理済み
						--reg,class,lit共通 reg[]の\123はblkで始末済み
						local num = tonumber( string.sub(sbuf, 2), 8)
						s = s..string.char(num)
						cpos = cpos+string.len(sbuf)
						goto lb_NEXT
					 end 
					--uni
					sbuf = string.match(bstr, '^\\[uU]', cpos)
					if  sbuf  then 
						local c = "[0-9a-fA-F]"
						local u4 = string.match(bstr, '^\\u'..c..c..c..c, cpos)
						local u8 = string.match(bstr, '^\\U'..c..c..c..c..c..c..c..c, cpos)
						sbuf=u4 or u8 or nil
						if sbuf==nil then  return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2, "bad unicode len, needs u4/U8")  end 
						if ctg=="class" then  return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2, "peg-class unsupports unicode: "..sbuf)  end 
						c = u.strconv(sbuf, "uc:b")
						-- reg系機能付きはesc pclsは弾いてる 全て変換してよし
						if ctg=="bre" and rcls<0 and string.find('^.[$*\\(){}', c, 1, true)  then c="\\"..c 
						elseif ctg=="ere" and rcls<0 and string.find("^.[$()|*+?{\\", c, 1, true)  then c="\\"..c end 
						s = s..c
						cpos = cpos+string.len(sbuf)
						goto lb_NEXT
					 end 
				 end 
				--生き残り==通常char系
				sbuf = string.sub(bstr, cpos, cpos)
				--invalid esc
				if rcls~=1  and  sbuf=='\\' then 
					return nil, ctx.srcemsg(str, cur-(string.len(rs)-cpos)-2,
						"bad charesc: "..string.sub(bstr,cpos,cpos+1) )
				 end 
				-- normal
				s = s..sbuf
				cpos = cpos+1
				--reg-class flg
				if rcls<0  and  sbuf=='[' then rcls= 1  
				elseif rcls>0  and  sbuf==']' then rcls= -1 end 	--reg[]脱出				
			 end::_luka_LOOPNEXT::end 
		 end 
		--classは追加変換したいけどtermsが出来て!. EOFが必要なので後回し
		local term = { ["ctg"]=ctg, ["data"]=s, ["info"]=ctg..":"..s, ["pos"]=cur }
		if ctg=="class" and s=="." then  term.ctg = "any"  
		elseif ctg=="hlit" then  term.ctg = "lit"  
		elseif ctg=="class" then 
			--class dataは255のbinになる
			local res, str = ctx.convclass(term.data)
			if res==nil then  return nil, ctx.srcemsg(str, cur, str..s)  end 
			term.data = string.char(u.tb2va(res) )
--print(term.data, string.byte(term.data, 34) );os.exit(1);
			term.info = term.info .. " "..str
		 end 
		terms[#terms+1]=term
		--defの入れ替え。LALR2から1になる
		if ctg=="def" then 	terms[#terms], terms[#terms-1] = terms[#terms-1], terms[#terms]  end 
	 end::_luka_LOOPNEXT::end 	--until EOS
	return terms, str
 end

--peg-classは256のcharに変換
function ctx.convclass(base) 
	local str = string.gsub(base, "^%-%-%-%-?", "-")	-----aaa ----aaa>>> -aに変換
	--先頭-なら単体range両対応出来るので纏めてしまえる
	-- ---a-b >> -a-b
	-- -----b >> --b	頭の-は消えるがrangeに入ってるので結果オーライ 
	
	--info用
	local tb = { string.byte(str, 1, #str) }
	local hnum = string.byte("-")
	local ninfo = ""
	for i=1,#tb do do 
		if i==1 or i==#tb or tb[i]~=hnum then ninfo=ninfo .. string.format("\\%03o",tb[i])  
		else  ninfo=ninfo .."-"  end 
	 end::_luka_LOOPNEXT::end 
	
	local ONE = 64	-- bin 1 だとputs(でみえない \0と\100=64= @にしとく
	local rtb={}
	for i=1,256 do do rtb[i]=0 end::_luka_LOOPNEXT::end 
	---を始末 \055, 45 2d
	if  str=="--"  then 
		-- -- だけ
		rtb[hnum+1]=ONE;	---は45だけどluaidxが1スタートなので\0はrtb[1]になる
		str=""
	 end 
	if string.find(str, "^%-")  and   not string.find(str, "^%-%-")  then 
		rtb[hnum+1]=ONE
		str=string.sub(str, 2)
	 end 
	if string.find(str, "%-$")  and   not string.find(str, "%-%-$")  then 
		rtb[hnum+1]=ONE
		str=string.sub(str, 1, -2)
	 end 
	local ptn = "(.)-(.)"
	for s,e in string.gmatch(str, ptn)  do do 
		local sc = string.byte(s) 
		local ec = string.byte(e)
		if ec<sc then 
			local buf = u.sprintf("%s(%d)>%s(%d): ", s,sc,e,ec)
			return nil, "bad peg-class range, s>e: "..buf
		 end 
		for i=sc, ec do do  rtb[i+1]=ONE  end::_luka_LOOPNEXT::end 
		
	 end::_luka_LOOPNEXT::end 
	str = string.gsub(str, ptn, "")
	--- kick
	if string.find(str, "%-")  then  return nil, "detect bad range in peg-class: "..str  end 

	--single
	tb = { string.byte(str, 1, #str) }
	for i=1, #tb do do  rtb[tb[i]+1] = ONE  end::_luka_LOOPNEXT::end 
	return rtb, ninfo
 end


--()とか+とかを%1系に変更
function ctx.subterm(subcnt, term) 
	local buf={}
	buf.ctg="ident"
	buf.data="%"..subcnt
	buf.pos=term.pos
	buf.info="ident:"..buf.data
	buf.pre=term.pre
	return buf
 end

-- 文法チェックと構成編集組み立て termsから調整済みのrulesを吐く
--bstrはエラー用の生src 
--term:	ctg, data, pos, info	(ctg==act, data=actlist)
--ctg:	ident,lit,class,act,any(.),eof(!.) 	...clsが消えて最終的に5つにまとまる
-- >>clsからregに変換。c側で高速化させる>> regで変換したら逆に遅い うんこ。戻す。
-- onigは遅かったけど、そもそもregを呼び出す事自体が遅くなる
-- gnurgでも同じ。litはO(1)だけどregはコストがかかるのでなるべく避けたい
-- regの利点はpegの多数のruleを一つの表現にまとめて判定出来ること 1文字ならlitが有利
-- 戦略としてはregでまとめて判定出来るのはregに突っ込んで太らせる
-- blkとかどうしてもやりにくいのだけpegで追加する
--actctg:	aC, aE, lit

--再帰を使ってるのでrtnはobj,pos,strの変則系になる
function ctx.bldrules(bstr, terms, pos, subcnt) 
	if terms==nil  or  #terms == 0 then  return {}  end 	--no rule. blank file etc
	
	pos=pos or 0
	local subflg=1;	if pos==0 then subflg=0 end 	--rootかそれ以外か
	local term=nil
	local rtop=nil	--subdefでrule名保管用
	local pre=nil
	subcnt=subcnt or 1
	local anyhash="" ;for i=1,256 do do anyhash=anyhash.."1" end::_luka_LOOPNEXT::end 
	local zerohash="" ;for i=1,256 do do zerohash=zerohash.."\0" end::_luka_LOOPNEXT::end 

	-- 非存在は問答無用で間違い 可能性は残す
	-- reg classは同じ性質なのでclassで二役判定
	-- defの後ろにはidentだけが炬火される...みたいな方式 lexで書き直せそうな気はする
	-- \系の中間処理もあるから簡単には行かないだろうしlua /C の複合になる気もする
	-- 熟成して仕様が固まったらCに直せばいいだろう B言語の例もあるし
	local nlist = {}	--allow nextctg...+ "act"
	nlist.def	= { ident=1 }	--scan時にひっくり返してある
	nlist.subdef= { pre=1, ["("]=1, ident=1, lit=1, class=1, bof=1 }
	nlist.ident	= {def=1,subdef=1,pre=1,suf=1,["("]=1,[")"]=1,ident=1,lit=1,class=1,bof=1,["{"]=1 }
	nlist.lit	= nlist.ident
	nlist.class = nlist.ident
	nlist.bof	= nlist.ident
	nlist.any   = nlist.ident
	nlist.ere	= nlist.ident
	nlist.bre	= nlist.ident
	nlist.pre	= { ["("]=1, ident=1, lit=1, class=1, bof=1 }
	nlist.suf	= {def=1,subdef=1,pre=1,["("]=1,[")"]=1,ident=1,lit=1,class=1, bof=1,["{"]=1}
	nlist["("]	= nlist.subdef
	nlist[")"]	= nlist.ident
	
	nlist["{"]	= { aC=1, aE=1, sep=1, ["}"]=1 }
	nlist.aC	= { ["="]=1, aC=1, lit=1, sep=1, ["}"]=1 }
	nlist.aE	= { sep=1, ["}"]=1 }
	nlist["="]	= { aC=1, lit=1 }
	nlist.alit	= { aC=1, lit=1, sep=1, ["}"]=1 }	-- lit...act/rule common atm
	nlist.sep	= { sep=1, aC=1, ["}"]=1 }
	nlist["}"]	= { def=1, subdef=1 }
	
	local rules= {}
	local rule= {}
	local subrules= {}
	
	local abuf= {}	--act listbuf
	local aflg=0	--in act
	--init
	local xctg = {def=1}	--guess neXt ctg

::lbl_NEXT::
	--init: pos=0
	while pos<#terms do do 	
		pos=pos+1
		term=terms[pos]
	--rule_err, pegrule ck. class/any/reg do same work.
		local tmp = term.ctg
		; if tmp=="ere" or tmp=="bre" or tmp=="any" then tmp="class" end 
		if xctg[tmp]==nil then  return nil,ctx.srcemsg(bstr,term.pos,"bad pegrule seq:"..term.ctg..": "..term.data) end 
		xctg=nlist[term.ctg]
		if aflg==1 and term.ctg=="lit" then  xctg=nlist.alit end 	--actのlitは別枠

		if  term.ctg=="def" then 
			--初回はダミーが入る
			rules[#rules+1]=rule
			rule={}
			goto lbl_NEXT
		 end 
		if  term.ctg=="subdef"  then 
			rules[#rules+1]=rule
			rule={}
			rule[1]=rtop
			goto lbl_NEXT
		 end 
		if term.ctg=="{" then  aflg=1; goto lbl_NEXT  end 
		if aflg==1 then 
			--actは羅列してsubfuncで処理する.ctgがactになるので後で処理
			if term.ctg=="}" then 
				aflg=0
				if #abuf==0 then goto lbl_NEXT end 
				local res, emsg = ctx.f_actrules(rtop, #rules, abuf, nlist, bstr)
				--#rules == rnum追加 初回はダミーが入るが+1で結果的に一致する
				if res==nil then return res, emsg end 
				rule[#rule+1] = res
				abuf={}
				goto lbl_NEXT
			 end 
			--_0系。prefixは変わりそう。スキップしてnumをとる。ovh上も先処理が有利
			abuf[#abuf+1]=term
			goto lbl_NEXT
		 end 
		
		--通常のrule系, sufの展開は止めた。ruleデータが爆発しそうなので分岐で対処.jmp数も減らせるし。
		if term.ctg=="pre" then pre=term.data; goto lbl_NEXT end 	-- !&
		if term.ctg=="suf" then 
			rule[#rule].suf=term.data
			goto lbl_NEXT
		 end 	--*+?
		--blk系。捏造%1系ins
		if term.ctg=="(" then 
			term=ctx.subterm(subcnt, term)	--header作成
			subcnt=subcnt+1
			
			local tbuf0={ctg="def"}
			local tbuf1=u.tdup(term)
			table.insert(terms, pos+1, tbuf1)
			table.insert(terms, pos+1, tbuf0)	-- <- %3 みたいな。
			local buf
			buf, pos, subcnt = ctx.bldrules(bstr, terms, pos, subcnt)	--カウンタを避けて再帰
			if buf==nil then return buf, pos end 		--err
			for i=1, #buf do do  subrules[#subrules+1]=buf[i]  end::_luka_LOOPNEXT::end 
			xctg=nlist[")"]		--next用に調整
			term.pre=pre
			pre=nil
			rule[#rule+1]=term
			goto lbl_NEXT
		 end 
		if term.ctg==")"  and  subflg==1  then  do break end  end 
		if term.ctg==")" then  return nil, ctx.srcemsg(bstr, term.pos, "invalid EOF, uneven block()")  end 
		--(), uneven...多分中間から入ってきた奴ら
	
		--reg,lit,ident
		--preが共通で付く。あと()もだけど、個別処理済み
		term.pre=pre
		pre=nil
		--reg... 重い. 速度の7割ぐらいがこれ@onig
		if term.ctg=="any" and term.pre=="!" then term.ctg="eof";term.pre=nil;term.data=zerohash end 
		if term.ctg=="any" then term.data=anyhash end 
		--elif(term.ctg=="class"&&term.data=="."){term.ctg="any"}
		rule[#rule+1]=term
		if #rule==1 then rtop=term; xctg=nlist.subdef  end 	--初回defはidentの制限ver
		goto lbl_NEXT
	 end::_luka_LOOPNEXT::end 
	--loop_end
	--頭は+1されてるので取り除く
	table.remove(rules, 1)
	--尻尾がちぎれるのでくっつける
	rules[#rules+1]=rule
	--root_rule
	for i=1, #subrules do do  rules[#rules+1]=subrules[i]  end::_luka_LOOPNEXT::end 
	
	--term検査を追加 badfixをkickする !&*+?は択一にする
	for r=1, #rules do do 
		local rule=rules[r]
		for t=1, #rule do do 
			local term=rule[t]
			if  term.pre  and  term.suf then 
			local s = "\nterms using both '!&' and '?*' is valid PEG but must be illegal rule\n" ..
			"   !__*, !__? is always fail same as !''	eg) R <- !'a'*	#>>'a', 'x'>>fail\n"..
			"   &__*, &__? is always suc  same as  ''	eg) R <- &'a'?	#>>'a', 'x'>>suc\n"..
			"\n'!&' and '+' is valid but used only in rare cases, so split the rules if necessary plz\n"..
			"	eg) R<- !'a'+ 	>>>		R1 <- !R2	R2<- 'a'+"
			return nil, ctx.srcemsg( bstr, term.pos, term.data..s)
			 end 
		 end::_luka_LOOPNEXT::end 
	 end::_luka_LOOPNEXT::end 
	return rules, pos, subcnt
	--ctgは ident,lit,class,act,+any,eof(., !.)あたり 
 end

-- チェックは済んでる。listに切り分けるだけ >>ckをこっちに持ってきた
-- regとclassはないから判定不要
-- dataはsep/esep付きのリニアterm. dataからalist[]の作成がしやすいよう細工
function ctx.f_actrules(rtop, rnum, tb, nlist, bstr) 
	local acts={}
	local xctg = nlist["{"]
	local flg = -1
	for i,v in pairs(tb)  do do 
		--continue dmy
		while 1 do do 
			if xctg[v.ctg] ==nil then  return nil, ctx.srcemsg(bstr, v.pos, "bad actblk tokens")  end 
			xctg=nlist[v.ctg]
			if v.ctg=="lit" then  xctg=nlist["alit"]  end 
			if v.ctg=="=" then  do break end  end 	--assingは無視スキップ
			if v.ctg=="sep" then  if flg == -1 then   do break end   end ; flg= -1  end 	--空連続はスキップ
			--追加。最終出力で速度を上げるため先に加工しておく
			if v.ctg=="aC" then 
				local buf=string.sub(v.data,2)
				v.data=tonumber(buf)
			 end 
			acts[#acts+1]=v
			if v.ctg~="sep" then  flg=0  end 
			 do break end 
		 end::_luka_LOOPNEXT::end 
	 end::_luka_LOOPNEXT::end 
	if #acts~=0 then 
		local pterm = acts[#acts]
		if pterm.ctg~="sep" then 
			local term = u.tdup(acts[#acts])
			term.ctg="sep"
			term.info="sep:;"
			acts[#acts+1] = term
		 end 
		local term = u.tdup(acts[#acts])
		term.ctg="esep"
		term.info="esep"
		acts[#acts+1] = term
	 end 
	local term={}
	term.ctg="act"
	term.data=acts		
	term.pos=acts[1].pos		
	term.info="act: RULE "..tostring(rnum)..": "..rtop.data		
	return term
 end
--デバッグ向け。ジャンプマップ表示 実用上は非常に重要
function ctx.rulesinfo(rules)  return ctx.frulesinfo(io.stderr, rules)  end
function ctx.frulesinfo(fh, rules) 
	if rules==nil then  return nil, "rules is nil" end 
	local res = ""
	for i=1, #rules do do 
		local rule=rules[i]
		res =  res..i..": "
		for ii=1, #rule do do 
			local term = rule[ii]
			local presub = (term.pre  or  "")..(term.suf or "")
			local sbuf = string.match(term.info, ":.*")
			res = res.. term.ctg..sbuf.." "..presub..", "
		 end::_luka_LOOPNEXT::end 
		res=res..":\n"
		if rule[#rule].ctg=="act" then 
			local acts=rule[#rule].data
			res = res.."act, "
			for j=1, #acts do do 
				local term = acts[j]
				res = res.. term.info..", "
				if term.ctg=="sep" then  res=res..":\nact, " end 
			 end::_luka_LOOPNEXT::end 
			res=res..":\n"
			-- esepで終わるから体裁はそれなりに整う
		 end 
	 end::_luka_LOOPNEXT::end 
	res = string.gsub(res, "%z", "\\0")
	u.fprintf(fh, "%s", res)
	return res
 end


-- rulesはtbなのでC向けリニアに変更。actも分離 合わせてrhead, aheadも作成
-- dataはlit/regでnumはszかident jmpのrule番号でctgで動作を変える
-- classは255 strで0/1作成する。-だけ注意して作る...事前にやった方が
-- スマートか。 classはdataが空いている。本来はlitとかreqの文字列が
-- 入るけど, 256につかえるからここに突っ込もう。上で突っ込んであったから
-- cデータへの変換部分かな
function ctx.taperules(rules) 
	if rules==nil then  return nil, "rules is nil" end 
	local rtokens, atokens, rhead, ahead={}, {}, {}, {} --hashはrule番号からposを取り出す
	--init
	rhead[1]=1
	ahead[1]=1

-- 一時的に必要。すぐ下で使う
	local rnhash={}
	for i=1, #rules do do 
		local rname=rules[i][1].data
		assert(rname, "hasherr")
		rnhash[rname]=rnhash[rname] or i		--無ければ設定、あくまでlua側のindにしとく
	 end::_luka_LOOPNEXT::end 
	for i=1, #rules do do 
		rhead[i]  = #rtokens +1	--nextruleのlua番号なので+1
		ahead[i]  = -1
		local rule=rules[i]
		for ii=1, #rule do do 
			local term=rule[ii]
			local cterm={}
			cterm.ctg=term.ctg
			cterm.data=term.data
			cterm.num=0	--strlen if lit/reg. ruleind if ident rendマイナスも使う
			if term.ctg=="lit" or term.ctg=="ere" or term.ctg=="bre" then  cterm.num= #term.data  end 
			--前はlitで文字数reqのため#dataだったけど、regは結局全文字
			--必要なので全読込になった。んでnumが不要になったのでidentのみ利用
			-- 本体ではなく筆頭のrnumを突っこむ fail時に再挑戦で使う
			if term.ctg=="ident" then 
				local buf = rnhash[term.data]
				if buf==nil then 
					buf = u.fprintf(nil, ": bad peg. using nodef rule: rule %d %s: %d,%d >> %s", i, rule[1].data, i, ii-1, term.data)
					return nil, buf
				 end 
				cterm.num=buf
			 end 
			cterm.pre=term.pre or ""
			cterm.suf=term.suf or ""
			--jmpは数値の方が何かと使い勝手がいい。
			if term.ctg~="act" then  rtokens[#rtokens+1]=cterm  
			elseif term.ctg=="act" then 
				--a側は事前にリニアにしてるからtypeを揃えて入れるだけ
				ahead[i] = #atokens +1
				for _,v in ipairs(term.data)  do do 
					local tbuf = {}
					tbuf.ctg = v.ctg
					tbuf.data= ""
					tbuf.num = 0
					if v.ctg == "lit" then 
						tbuf.data=v.data
						tbuf.num =#v.data
					 end 
					if v.ctg == "aC" then  tbuf.num = v.data  end 
					if v.ctg == "sep" then  tbuf.num = -1  end 
					if v.ctg == "esep" then  tbuf.num = -2 end 
					tbuf.rnum = i
					tbuf.rname= "rule "..tostring(i)..": "..rule[1].data
					atokens[#atokens+1] = tbuf
				 end::_luka_LOOPNEXT::end 
			 end 
		 end::_luka_LOOPNEXT::end 
		local tbuf={ctg="sep", data="", num=-1, pre="", suf=""}
		rtokens[#rtokens+1]=tbuf
	 end::_luka_LOOPNEXT::end 
	local tbuf={ctg="esep", data="", num= -9, pre="", suf=""}
	rtokens[#rtokens+1]=tbuf
	tbuf={ctg="esep", data="", num= -9, rnum=0, rname=""}
	atokens[#atokens+1]=tbuf
	rhead[#rhead+1]  = -9	--sentinel
	ahead[#ahead+1]  = -9
	tbuf={rtokens=rtokens, atokens=atokens, rhead=rhead, ahead=ahead}
	return tbuf 
 end

-- jmpチェック. identと''*の無限を調べる。それ以外は進む可能性が残る
-- *系は限りなく黒だけどスルー 実行時検査に任せる
-- rootrulesを使ってたおかげでそのまま使える。助かった
-- 追加検査: !& *?の競合''と!''に縮退するので文法エラーとするのが正しい
-- !+と&+は無意味だからこいつもついでにkickする
function ctx.loopck(rules) 
	--枝分かれとerr記録用。err以外は巻き戻したりする
	local st={rpos=1, tpos=1, eres="", looptb={}, jmpstk={}, rc=0 }
	local buf=nil
	st.eres=st.eres.."> -- rule 1 "..rules[st.rpos][1].data
	while 1 do do 
		local term=rules[st.rpos][st.tpos]
		if st.tpos==1 then  st.looptb[term.data]=1; goto lb_WALK  end 
		if st.tpos>=2 and term.ctg~="ident" and term.pre==nil then 
			--進行要素があればリセット
			st.looptb[rules[st.rpos][1].data]=nil
		 end 
		--空文字loopは即死 regはcに渡すまで分からないのでそっちでやる
		if  (term.suf=="*" or term.suf=="+") and (term.ctg=="lit" and term.data=="")  then 
			st.eres=st.eres..
			u.fprintf(nil, "\nERR rule %d %s: %d,%d: bad peg. empty lit loop"
			, st.rpos, rules[st.rpos][1].data, st.rpos, st.tpos-1)
			return nil, st.eres
		 end 
		--本来の検査
		if term.ctg=="ident" then 
			--事前に消費要素があってidentならスキップで*?扱い
			--結果の再利用はしないから少し無駄があるけど面倒なので放置
			if  st.looptb[rules[st.rpos][1].data]==nil  then goto lb_WALK end 
			--アウト
			if st.looptb[term.data] then 
				local dst
				for i,v in ipairs(rules)  do do 	if v[1].data==term.data then dst=i; do break end  end   end::_luka_LOOPNEXT::end 
				st.eres=st.eres..
				u.fprintf(nil, ": bad peg. infinite ruleloop: rule %d %s: %d,%d >> rule %d %s"
				, st.rpos, rules[st.rpos][1].data, st.rpos, st.tpos-1, dst, rules[dst][1].data)
				return nil, st.eres
			 end 
			--loop候補 初jmp
			st.jmpstk[#st.jmpstk+1] = st.rpos
			st.jmpstk[#st.jmpstk+1] = st.tpos
			local rname = rules[st.rpos][st.tpos].data
			--未定義検査 逆引き
			for i,v in ipairs(rules)  do do  if v[1].data==rname then rname=i;  do break end  end   end::_luka_LOOPNEXT::end 
			if type(rname)~="number" then 
				st.eres=st.eres..
				u.fprintf(nil, ": bad peg. using nodef rule: rule %d %s: %d,%d >> %s"
				, st.rpos, rules[st.rpos][1].data, st.rpos, st.tpos-1, rname)
				return nil, st.eres
			 end 
			st.rpos=rname
			st.tpos = 1
			st.eres=st.eres.."\n> jmp rule "..tostring(st.rpos)..": "..rules[st.rpos][1].data
			goto lb_NEXT
		 end 
		-- identはpreがあっても必須それ以外は preと''は未消費なのでスキップ
		if term.pre~=nil or (term.ctg=="lit" and term.data=="") then  goto lb_WALK  end 
	::lb_NOLP::
		--おそらく消費があった jmp/skip系はWALKかNEXTしてる
		st.eres=st.eres.."\n> jmp stop "..tostring(st.rpos)
		if #st.jmpstk~=0 then  st.rpos=st.jmpstk[1] end 
		goto lb_RNEXT
	::lb_WALK::
		st.tpos=st.tpos+1
		if  st.tpos <= #rules[st.rpos]  then  goto lb_NEXT  end 
		--終端なら判定が必要 jmpで次に行くかもしれない
		goto lb_WALL
	::lb_WALL::
		-- >>jmp中ならnextを調べないといけない
		if #st.jmpstk~=0 then 
			--nextが居た
			if rules[st.rpos+1][1].data == rules[st.rpos+1][1].data then 
				st.rpos=st.rpos+1
				st.tpos=1
				st.eres=st.eres.."\n> subrule "..tostring(st.rpos)
				goto lb_NEXT
			 end 
			--いなかったらloop無し確定
			goto lb_NOLP
		 end 
		--jmp中じゃなかった フツーにok
		goto lb_RNEXT
	::lb_RNEXT::
		st.rpos=st.rpos+1
		if  #rules<st.rpos  then   do break end   end 
		buf = rules[st.rpos][1].data
		buf = string.sub(buf, 1, 1)
		if buf=="%" then  do break end  end 
		--init
		st.tpos=1
		st.looptb={}
		st.jmpstk={}
		st.eres=st.eres.."\n> -- rule "..tostring(st.rpos).." "..rules[st.rpos][1].data
	::lb_NEXT::
	 end::_luka_LOOPNEXT::end 
	st.eres=st.eres.."\nloop check ok\n"
	return st.eres
 end
-- 1stはinfostr, 2ndは未消費の危険がある奴等のflg, r[10]=1 , rule10が*のみとか


--[[
ID <- "123"	...1
ID <- "123"* "xyz"	...1,x
ID <- "123"+ "xyz"	...1
ID <- "123"? "xyz"	...1,x
ID <- !"123" "xyz"	...x xは必須条件 !は全て無視
ID <- jmp	"xyz"	...jmpのリストをコピー
ID <- jmp*	"xyz"	...jmp, x 
]]

-- tb2c conv
function ctx.cdatamaker(obj) 
	local ffi = require("ffi")
ffi.cdef[[
typedef struct rtoken_tag {
	const char* ctg;
	const char* data;
	int num;
	const char* pre;
	const char* suf;
} rtoken_t;

typedef struct atoken_tag {
	const char* ctg;
	const char* data;
	int num;
	int rnum;
	const char* rname;
} atoken_t;
]]
	--rtokens, atokens, rhead, aheadの四つ	+	rsaveでgrub
	local rsave={}
	rsave[#rsave+1]=obj
	--rtokens	各tokenを基本形にしてるのでluajitが自動的にconv作成してくれる
	local rtokens_c = {}
	for i, v in ipairs(obj.rtokens)  do do rtokens_c[#rtokens_c+1] = ffi.new("rtoken_t", v)  end::_luka_LOOPNEXT::end 
	local rtokens = ffi.new("rtoken_t[?]", #rtokens_c, rtokens_c)
	rsave[#rsave+1] = rtokens_c
	rsave[#rsave+1] = rtokens
	--rhead
	local rhead = ffi.new("int[?]", #obj.rhead, obj.rhead)
	rsave[#rsave+1] = rhead
	
	--atokens
	local atokens_c = {}
	for i, v in ipairs(obj.atokens)  do do atokens_c[#atokens_c+1] = ffi.new("atoken_t", v)  end::_luka_LOOPNEXT::end 
	local atokens = ffi.new("atoken_t[?]", #atokens_c, atokens_c)
	rsave[#rsave+1] = atokens_c
	rsave[#rsave+1] = atokens
	--ahead
	local ahead = ffi.new("int[?]", #obj.ahead, obj.ahead)
	rsave[#rsave+1] = ahead
	
	local res = {
		rtokens=rtokens
		, rhead=rhead
		, atokens=atokens
		, ahead=ahead
		, rsave=rsave }
	return res
 end

function ctx.ped_makeinfo(mode, s) 
	local tb, rstr = ctx.f_scanpeg(s, mode);		assert(tb, rstr)
	local rules, emsg = ctx.bldrules(rstr, tb);		assert(rules, emsg)
	local loopinfo, emsg = ctx.loopck(rules); assert(loopinfo, emsg)
	local ruleinfo =  ctx.frulesinfo(nil, rules) .. "\n".. loopinfo
	return ruleinfo
 end

--cdata to ptr address num
function ctx.c2p(cdata) 
	return tonumber(ffi.cast('uintptr_t',ffi.cast('void *', cdata) ) )
 end

-- (str, str)
function ctx.ped_makebase(mode, rstr) 
	--	mode = "nNrR"	の4種類が必要
	local tb, rstr = ctx.f_scanpeg(rstr, mode);		assert(tb, rstr)
	local rules, emsg = ctx.bldrules(rstr, tb);		assert(rules, emsg)
	--bldは再帰をつかうのでemsgにposデータが入ってることがある
	local loopinfo, emsg = ctx.loopck(rules);	 assert(loopinfo, emsg)
	local ruleinfo = nil
	if  string.find(mode, "d")  then 
		 ruleinfo = ctx.frulesinfo(nil, rules) .. "\n".. loopinfo
	  end 
	--infoにloop危険情報入り。2ndはrule[3]==1ならloop危険だけど使わんかも
	tb, emsg = ctx.taperules(rules); assert(tb, emsg)	
	--こいつらが基本の成果物 pvmに通すとfuncが入ってくるので寸止め
	--tb = {rtokens=rtokens,atokens=atokens,rhead=rhead,ahead=ahead}
	tb = ctx.cdatamaker(tb)
	local f = ctx.c2p
	local res = { rtokens=f(tb.rtokens), rhead=f(tb.rhead)
		, atokens=f(tb.atokens), ahead=f(tb.ahead), ruleinfo=ruleinfo, rsave=tb.rsave }
	return res
 end


function ctx.ped_version() 
	local buf=[=[
ped 2.1.0
Copyright (C) 2021 momi-g
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.]=]
	return buf
 end

function ctx.ped_help() 
	local buf=[=[
HowTo (ped, sed-style editer using peg)	
opt: -[e|f][n|N][r|R] ( -hHgVtTodEL: others. see ~$ ped -H|less ) 
 e/f(pegexpr/file), n/N(noout/NotALLOW if hit norule), r/R(expand syntax)
--
 eg) ~$ echo "12a"|ped -re 'ID<-[a-z]+ {_1="Z"}'  #-enf works like sed opt
 eg) ~$ ped -rf buf.peg < src.txt	# load rulefile (noBOM ascii/utf8 only)
 eg) ~$ ped -nrf buf.peg src.txt	# disp only hit rule

-- peg sample. support all orig peg syntax: '' "" <- / () [] .!&+*?#
	NAME <- 'alice'+  / . "ob" / ID		#bob rob 3ob etc... 
	ID <- ![0-9] [_a-zA-Z0-9]+		#varname etc
 ...see https://pdos.csail.mit.edu/papers/parsing:popl04.pdf

-- expansion(-r/R opt): editblk, ERE/BRE-reg, esc(\ooo), accept binary etc
 editblk: {} = ; "" '' _0 _1 _2..(blk/assign/sep/lit/field. nl works as sep)
 charesc: \[abntvrf\[]'"], \0-377, \u0000-\U0010FFFF(4/8 digits)
 reg: e/(a|b)+/, e"(a|b)+", b'[0-9][\u3042-\u3044]' etc. see -H

 eg) ~$ ped -re 'MYRULE <- "a" ("b"/"c") "d" {_1=_3 "\043" _2; _3="Z"}' 
	(in)acd >>> 'd#c' 'c' 'Z' >>> (out)d#ccZ (_0:concat all field str)
]=]
	return buf
 end

function ctx.ped_Help() 
	local buf=[=[
-- ped detail help
 ped [-e rule|-f rulefile] [OPT] file/stdin
 opt: [n|N] [r|R] [t|T] [others: hHgVodEL] 

 -e/f: set pegrule with -e:optargs / -f:file (ascii/noBOM utf8)
    eg) ~$ echo abc|ped -re 'R1<-"b" {_1="Z"}'  #>>  aZc  
    eg) ~$ echo abc|ped -rf myrule.peg

 -E: set parse target instead of file/stdin
    eg) ~$ ped -re 'R1<-"b" {_1="Z"}' -E 'abc'  #>>  aZc  
    eg) ~$ printf "abc"| ped -re 'R1<-"b" {_1="Z"}'	#>> (same)
    ...-E/stdin/ag1(file) is selective. 

 -n/N: noout/stop if hit norule
    eg) echo abc|ped -nre 'R1<-"a" {_1="Z"}'  #>>  Z
    eg) echo abc|ped -Ne 'R1<-"a"'  #>> stop. $?=1
    
    below commands work the same
    eg) echo abc |ped -nre 'R1<-"a"'  #>> a
    eg) echo abc |ped  -re 'R1 <- "a"	OTHER <- . {_0 = ""}'	#>> a

 -r: expand peg syntax. allow only orig-peg syntax if noset.
   editblk: {} = ; "" '' _0 _1 _2..(blk/assign/sep/lit/field. nl works as sep)
   regex  : ERE: r'', r"", r//, BRE: b'', b"", b//,
   herelit: h'', h""   h"\123\n" == "\\123\\n",  stop back slash working
   charesc: \0-377, \a, \u, \U etc
   BOF    : add anchor syntax !!. as BOF. sed anchor ^$ >> ped !!. !.

  --edit block
    peg is just a grammar/syntax rule like BNF and doesnt have
    action block as sed/awk/yacc. -r/R add edit block.
    
    eg)
    NAME <- [aA] 'lice'	{_1='z'; _2="xx" _1}   #alice >> z + xxz >> zxxz
    NAME <- [aA] 'lice'	{_0 = 'bob\012' _2 }   #Alice >> bob(\n)lice
    ~$ echo "abd" |ped -re 'R1 <- "a" ("b"/"c") "d" {_1=_3 "AB" _2;_3="Z"}' 
    ... _1="a", _2=("b"/"c"), _3="d", _0 = all	>>>d AB b b Z
    
    edit block syntax:
      {}: edit block
      = : assign
      _1: field. same as sh $1,$2. select grp allstr if use to grouping()
      _0: allstr of the rule as awk $0. other flds become undef if edit _0
      '': lit. allow charesc \u0010, \n etc. accept h'...'
      "": lit. accept h"..."
      ; : separator(ignore). editblk is free format so just improve readability
      (space/tab/newline): same as separator

  --regex
    add ERE: e'', e"", e// or BRE: b'', b"", b//. regex supports posix
    ERE/BRE with the following.  

			(guideline)
     - resemble 'posix sed -E/sed' syntax as much as possible
     - unclear syntax causes error (\191 != \1[9][1], raise err)
     - plz use ERE and use BRE only when you need backref \1-9

			(expantion)
     - .(any)/[] includes '\0' and doesnt hold binary. locale charactor only.
     - accept charesc \ooo, \n, \u, \U etc. see the below charesc.
     - \ooo is used as bin in ERE []: [\316\243]=(\316|\243) != [Σ]==[\u03a3]
     - \ooo cant use in BRE []: [\316] != [136\], raise err.(BRE misses '|' op)
     - use system locale setting (see ~$ locale, maybe utf8 etc )
	
			(restriction)
     - regex tests only the head of the input, work as tokenizer like lex.
     - eos anchor '^' works but get the same result: e/^abc/ == e/abc/
     - needs esc blockchar outside of []: e"/" == e/\// == e/[/]/
     - eol anchor '$' raises error. use peg-eof '!.': e/abc$/ >>> e/abc/ !.
	
       eg) R1 <- ![0-9] [_a-zA-Z0-9]+
           R1 <- e'[_a-zA-Z][a-zA-Z0-9]*'	#works the same except binary
       
       eg) R1<- e'\u0061' e'[\u0061-\u0062]' "\u0061"
           R1<- e'a' e/[a-b]/ 'a'
           R1<- e'a' e/[ab]/ 'a'
       
       eg) R1<- "\1" b'\(ab\)\10\1'   #>> "\001" "ab" "\010" "ab"  bkref:\1-\9
       eg) R1<- b'\(ab\)\1[\1]'	#>> "ab" "ab" "\" or "1" :posix rule
       eg) R1<- e'[\a]'	#>> "\a" == "\7" == "\007"
       eg) R1<- e'[a\]'	#>> r/(a|[\])/, posix rule

    posix-regex cant use unicode/binary charesc, but -r/R mode regex accept
    them using escape syntax as follows.
     
     - charesc \abntvrf works
     - \ooo,\uU work as bin/unicode char itself(doesnt work as special)
     - out of reg-class[] allows all charesc except BRE backref \1-9
     - reg-class cant mix charesc. [\oct], [\uni], [posix+\abntvrf]
    
    	eg) e'/\n[\t\n]/'	== e'/\012(\011|\012)'	...newline etc
    	eg) e'/ab\101\u0041/'	== e'/abAA/'
    	eg) e'/\a\134/' == e'/\007\\/' == e'/\a[\]/'	..bin itself
    	eg) e'/a\u0028/' == e'/a\(/' == e'/a[(]/'
    	eg) b'/\(a\)b\1\01/' == b'/\(a\)b\1\001/'   ..\1 is backref
    	eg) e'/[\1-\02\10]/' == e'/(\001|\002|\010)/'	...ok
    	    e'/[abc\n\t[:alnum:]]/' ...ok, posix+\a...
    	    e'/[\u0041-\U00000042\u0043]/' == ([A-B]|[C])	...ok
    	    e'/[\012\u0010]/'	...NG	mix \ooo, \u
    	    e'/[a\012]/'	...NG	mix \ooo, posix+\a..
    	    e'/[\n\012]/'	...NG
    	    e'/[a\u0012]/'	...NG
    	    e'/[^\u0012]/'	...ok, hat(op 'not') works 
    	
  --charesc
    orig-peg allows only ascii-env(\0-\277) and limited charesc(\a is invalid).
    -r/R expand ascii to byte-oriented(\0-377) and add c99 charesc.
        peg-class: [] 1char   >>> 1byte
        peg-any: .(dot) 1char >>> 1byte

    charesc rule will be 'orig-peg(base)+c99' or 'regex(base)+c99'

                      (no -r/R mode and c99)    
        peg       ERE(out[])      ERE(in[])         c99
    \[nrt[]\'"]   \[.[$()|*+?\{]     -          \[abntvrf?\"']	
     \0-\277             -           -             \0-377 
         -               -           -            \x0-\xff
         -               -           -      \u0000-\U0010ffff..u4/U8
   
                        (-r/R mode)
        peg                  ERE(out[])             ERE(in[])      
   \[nrt[]\'"abntvrf]  \[.[$()|*+?\{abntvrf]       \[abntvrf]
        \0-\377               \0-\377                \0-\377         
        -nohex-               -nohex-                -nohex-        
  u4/U8(pegcls cant use)       u4/U8                  u4/U8          

   ...basic charesc working in -r/R mode are the belows
    - \xHH doesnt work
    - regex-class e/[]/ restricts some charesc, see above regex section
    - \abntvrf work everywhare
    - \0-\377 work everywhare
    - unicode \u4, \U8 work evrywhere except peg-class (byte oriented)
    - \ooo and \uU never works as special chars, \101==A, \134 == \\ 
    - others depend on where they belong: peg-class [\[] == reg-class [[]
  
  eg)
      id <- 'abc\[\"\'\u0041'  >>>  abc["'A ...peg-lit
      id <- [\n\101\a]   >>> [\012A\007] 	...peg-class(1byte)
      id <- [\u0041]     >>> invalid 		...peg-class
      id <- [\101-\103]    >>> [ABC]		...peg-class(range)
      id <- [\136\055\101] >>> [^-A] == [-A^] 	...peg-class(3chars)
      (..peg-class is very similar to regex-class, but differ in detailis)
      
      id <- e'\u0041[\136\101-\103]'  >>> e'A(^|A|B|C)'	...ERE(in[])
      id <- e'[\u0041-\u0043]' >>> e'[A-C]'	...ERE(in[])
      id <- e'[\u005e\0041]' >>> e'([^]|[A])'	...ERE(in[])
      id <- e/[\n\u0041]/ >>> invalid, mixed charesc
      id <- e/\n[\[a]/  >>> e/\134([\]|[[]|a)/	...ERE(in[])
      (..add c99 to ERE-regex. see regex section)

  --BOF
	add syntax '!!.' as begining of file. dont make space, '! ! .' etc.
	ped BOF '!!.' and EOF '!.' corresponds to sed anchor '^' and '$'
	  eg) printf "abc \n 123" | sed -e 's/^/@/g'	#>> @abc @123
	  eg) printf "abc \n 123" | ped -re 'R <- (!!./'\n') {_0=_0 '@' }

 -R: same as -r but regex doesnt use system locale.
  opt "-r" checks system locale amd use it automatically.
  opt "-R" skips system locale check (maybe use "C" locale)
  if you dont use regex, -r and -R will behave the same.
    
    ~$ locale	#>> LC_CTYPE == lang.UTF-8
    ~$ echo "Σ" | ped -re 'RULE <- e/./ {_0 = "Z"}'	#>> Z
    ~$ echo "Σ" | ped -Re 'RULE <- e/./ {_0 = "Z"}'	#>> ZZ
    
    ~$ locale	#>> LC_CTYPE==lang (system unsupports multibyte locale)
    ~$ echo "Σ" | ped -re 'RULE <- e/./ {_0 = "Z"}'	#>> ZZ
    ~$ echo "Σ" | ped -Re 'RULE <- e/./ {_0 = "Z"}'	#>> ZZ
    	
    posix .(any)/[] doesnt use 'one byte' but 'one charactor' so system
    locale setting affects to multibyte chars handling.  
    https://www.gnu.org/software/sed/manual/html_node/Locale-Considerations.html
    
    I recommend you to use -r opt basically. use -R when you need:
      - parse binary data
      - needs regex absolutely
      - the env isnt fixed but want to make the pedrule portable

 -t: output concrete syntax tree(CST) with ascii text

    ~$ echo abc | ped -tre 'R1 <- "a"'
    >>>
     # 1 OP RULE 1 0 R1
     # 1 OP FIELD 1 1
     \142
     # 1 C FIELD 1 1
     # 1 C RULE 1 0 R1
       ...
    info fmt: # (depth) (open/close) (rule/fld) (rulenum) (fldnum) [rulename]
    data fmt: \ooo (octet 3 digit, \042 etc)
      - if you set an edit block, the nested inside ruleinfo will be lost
      - internal ruledata may be displayed (%1, %R_OPT etc. see -d opt)
    
 -T: same as -t but parser doesnt edit. it may be useful when creating your
    own syntax tree.
	 
 -o: output to a file instead of stdout
	eg) ~$ ped -rf buf.peg src.txt		#>> write to stdout 
	eg) ~$ ped -rf buf.peg src.txt -o dst.txt 	#>> write to dst.txt 

 -d: disp pegrule debuginfo. return 1 ($?==1) if pegrule is invalid.
 	eg) ~$ ped -df buf.peg	#>> $? == 0 if pegrule is valid

 -L: newline str, \r\n, \r, \0 etc. this opt only uses for parse emsg.
    this opt never affects to parse result. accept c99 charesc syntax.
    use dfl:'\n' if noset
    eg) ~$ ped -rL 'ab' -f buf.peg src.txt  #>> use 'ab' as line separator 
    eg) ~$ ped -rL '\141\142' -f buf.peg src.txt  #>> the same result
    eg) ~$ ped -rL '\r\u000a' -f buf.peg src.txt  #>> '\r\n'
 
 -h: disp help
 -H: disp detail Help
 -g: ignore. this option do nothing
 -V: version info

-- appendix
 - pegrule(orig) ..https://pdos.csail.mit.edu/papers/parsing:popl04.pdf
   ped supports all orig pegrule. orig/ped is freeformat.
 
 '' : lit	eg) 'abc', 'a\143c'
 "" : lit
 <- : rule define	eg) RULE_HW <- 'hello' 
  / : rule def 'OR'	eg) RULE_HW <- 'hello' / 'hi'
 () : grouping  	eg) NAME <- ('bo' / 'bom') 'b'	# bob,bomb
 [] : char class	eg) NAME <- [a-cA] 'lice'   # alice,blice,clice,Alice
  . : any 1 char	eg) NAME <- . 'lice'	# xlice, ylice, zlice...	
  + : one or more	eg) NAME <- 'ab'+	# ab, abab, ababab...
  * : zero or more	eg) NAME <- 'a' 'b'* 'c'   # ac, abc, abbc.. (danger)
  ? : zero or one	eg) NAME <- 'a' 'b'? 'c'   # ac, abc (danger)
  ! : not/except	eg) NAME <- !'A' . 'lice'  # similar to regex [^A]lice	
  & : and/include	eg) NAME <- &'ab' [a-z]+   # abzz, ababc, abx ...
  # : linecomment	eg) NAME <- 'abc'  # cmt skip until newline, \r\n,\n,\r
 !. : not+any==EOF	eg) END  <- '\n' !.	# similar to sed EOL '$'	 	
 
 class[] is similar to regex, but 'NOTsymbol' [^] doesnt work, needs esc [\]]. 
 '*?' is danger symbol. peg is recursive descent parsing so the below rule
 is valid grammar but causes infinite loop.
	RULE1 <- '' / 'abc'
	RULE2 <- 'abc'*
	RULE3 <- 'abc'?
 be careful when '*?' is at the top of the rule. ped raises error if infinite
 ruleloop exists. you can also ckeck ruleloop if set -d opt.

 - run orig peg
   ped works fine under the orig syntax. it will run as a grammar checker. 
	~$ echo xyz | ped -e  'R1 <- "abc"'	#>> xyz,      $?=0
	~$ echo xyz | ped -ne 'R1 <- "abc"'	#>> (nodisp), $?=0
	~$ echo xyz | ped -Ne 'R1 <- "abc"'	#>> (errstop) $?=1

 - bench mark: 1cpu 2.8GHz
	~$ time cat 1Mb.txt| sed -e 's@[_a-zA-Z][_0-9a-zA-Z]*@X@g'
	~$ time cat 1Mb.txt| ped -re 'ID<-![0-9] [_0-9a-zA-Z]+ {_0="X"}'
	>>>
	 sed: real 0m0.517s
	 ped: real 0m0.618s
	...130-150ms to convert 1000 lines (in ped self-hosting) 

 - literal rule,  "123\"abc" etc
    ERE: ["](\\"|[^"])*["]

	ped:
	LIT <- DQ (!DQ .|ESC_DQ)* DQ 
	DQ <- '"'
	ESC_DQ <- h'\"'		# or '\134\042'

	LIT2 <- DQ e'(\\"|[^"])*' DQ		# text input only 
	LIT3 <- DQ e'(\\"|[^\u0022])*' DQ	# same 
	LIT4 <- DQ e'(\\"|[^\042])*' DQ	  # allow binary input "a(\377)b" etc
 
 - search cmt
   (delcmt.ped)
     LINECMT <- '//' (!'\n' .)* '\n'	{_0 = "KILL_L " }
     MULTICMT <- '/@' !'@/' .* '@/'  {_0 = "KILL_M " } #.(dot) == 1byte
     
     # easy+fast using ped-regex
     # L <- e'//[^\n]\n' {_0="KILL_L"}
     # M <- e'/@(@[^/]|[^@])*@/' {_0="KILL_M"}
     
   (src.txt)
     abc //cmt
     xyz /@ cmt //abc
       hello, world @/
   ~$ ped -f delcmt.peg<src.txt		#>> abc KILL_L xyz KILL_M

 - ped concept
    - sed with more powerful grammer expression
    - respect the orig and standard. avoid vendor lock-in syntax as PCRE
    - portable
    - easy to use. low learning cost

 - other sample (ped self-hosting C >> luajit, ~$ ped -rf luka.ped src.txt)
]=]..[=====[

# luka.ped, transpiler C-syn to lj 
#--main-rules
stmt <- 
		#skip multibyte-terms
	BLANK	
	/ LITS
	/ CMT
		#edit statement
	/ blk_stmt	# loop etc: for(i=1,10){..} >> for i=1,10 do .. end
	/ RB_DFL	#if( a=(1+2) ), nest logic, !")" stmt
	/ CB_DFL
		#edit terms
	/ M_TERM
		#pass oters
	/ IDENT		#get longbyte using regex to same jmpcost
	/ .		#all 1 byte
EOF <- !.
#--main-rules-end

#--scanner

		#add myrule-syntax
M_TERM <- ":=" {_0 = "="}
	/ "**" {_0 = "^"}
	/ "!=" {_0 = "~="}
	/ "!"  {_0 = " not "}
	/ "&&" {_0 = " and "}
	/ "||" {_0 = " or "}
	/ "break" ! [.a-zA-Z_]		{_1 = " do break end "}
	/ "continue" ! [.a-zA-Z_]  {_1 = " goto _luka_LOOPNEXT "} #for/while
	/ "lo"	! [.a-zA-Z_]		{_1="local"}
	/ ";\n" {_0="\n"}		# for astyle etc. del last semi-colon

		#blank
BLANK <- (SPACE/TAB/NL)+
SPACE <- " "
TAB <- "\t"
NL <- "\r\n" / "\n" / EOF

		#ident	nohit a["b"].val etc. uses only for jmpcost + func_stmt
IDENT <- e"([a-zA-Z_][a-zA-Z0-9_]*)([.][a-zA-Z_][a-zA-Z0-9_]*)*"  #aa.bb.cc

		#cmt 
CMT <- MCMT / LCMT
LCMT <- e"--[^\n]*" "--" (! NL .)* NL
	 / "//"  (! NL .)* NL {_1="--"}

		#add for C-cmtstyle...longstr comes 1st. 
MCMT <- "--" MLIT		#MLIT [[...]], MCMT --[[...]]
	/ "/*---" (!"---*/" .)*  "---*/" {_1="--[===["; _3="]===]"}
	/ "/*--" (!"--*/" .)*  "--*/" {_1="--[==["; _3="]==]"}
	/ "/*-"  (!"-*/" .)* "-*/" {_1="--[=["; _3="]=]"}
	/ "/*" (!"*/" .)* "*/"	{_0="--[[" _2 "]]"}

		#lit
LITS <-	e/"([\].|[^\"])*"/
	/	e/'([\].|[^\'])*'/
	/ MLIT		#here-lit == multiline-lit

MLIT <- "[[" (!"]]" .)* "]]"
	/ "[=[" (!"]=]" .)* "]=]"
	/ "[==[" (!"]==]" .)* "]==]"
	/ "[===[" (!"]===]" .)* "]===]"
	/ "[====[" (!"]====]" .)* "]====]"
	/ "[=====[" (!"]====\075]" .)* "]====\075]"
	/ "[" "="+	{_E}		# stop if more than =6
		#add C-style mlit
	/ "/*===" (!"===*/" .)*  "===*/"	{_0 ="[===[" _2 "]===]"}
	/ "/*==" (!"==*/" .)*  "==*/"		{_0 = "[==[" _2 "]==]"}
	/ "/*="  (!"=*/" .)* "=*/"			{_0 =  "[=[" _2 "]=]"}

		#block_stmt...very complex:  for(){}, if(){}, elif(){}
blk_stmt <- ("for"/"while") RB_LP CB_LP
			{_3 = "do do" _3 "end::_luka_LOOPNEXT::end "}
	/ if_stmt
	/ func_stmt
	/ ";{" CB_LP { _0 = "do" _0 "end "}		# ;{...} >> do ... end

		# if_stmt	if(){..} >> (else)if .. then .. end
if_stmt <- BLANK? "if" RB_LP CB_LP ELIF_BLK* EL_BLK?
		{_3 = _3 "then"; _0 = _0 "end "}
ELIF_BLK <- BLANK? ELIF_WORD RB_LP CB_LP { _3= _3 "then" }
EL_BLK <- BLANK? "else" CB_LP
ELIF_WORD <- "elseif"
		/ "elif" {_0="elseif"}

		#uses only while,for,if etc		for(a) >> for a 
RB_LP <- BLANK? "(" ( !")" stmt)* ")" {_0 = " " _3 " "}
CB_LP <- BLANK? "{" ( !"}" stmt)* "}" {_0= " " _3 " "}

		#normal blk, if( (1+2) ) >> if (1+2) : needs ERB_DFL in stmt
RB_DFL <- BLANK? "(" ( !")" stmt)* ")"
CB_DFL <- BLANK? "{" ( !"}" stmt)* "}"

		#fn_def:  fn a.b(){}, a=fn(){}, 	edit {} >> ...end
func_stmt <- FN_WORD BLANK? IDENT? RB_DFL CB_LP	{_0 = _0 "end"}
FN_WORD <- "function"
		/ "fn"	{_0="function"}
]=====]
	return buf
 end

--[[SH_SMP
local u = require("ped_ljmod")
print( u.ped_help() )
//SH_SMPE]]

if u.ismain()  then 
	local tb={u.tb2va(_G.arg)}
	print(ctx.ped_Help() )
 end 
return ctx



--[[
 change log
 --
2021-08-15  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (usage_H):  fix doc

2021-08-11  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (f_scanpeg): change reg r"" >> e"", b""
	* (other): fix scan \[ and other charesc, reg syntax, reg esc

2021-07-26  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (frulesinfo): replace infostr \0 >> '\\0' for disp msg
	
	* ped_ljmod.sh.lua (scanpeg): fix reg-octesc 'hat' ".[$(..." >> "^.[$(..."

2021-07-21  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (loopck): change loopck routine, add -2, fillrpos etc

2021-07-15  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (scanpeg): add heredoc h""/h''

2021-07-10  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (loopck): add undef rule using emsg

2021-07-05  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (ped_version): apply license info. v2.0.0

2021-07-01  Momi-g	<dmy@dmy.dmy>

	* ped_ljmod.sh.lua (uniclass): fix [\u005e-\u005f] >> [^-_] logic, [_-_^]
	* (usage): fix doc
	
]]
