enum SupportedTextSplitterLanguage {
Cpp,
Go,
Java,
Js,
Php,
Proto,
Python,
Rst,
Ruby,
Rust,
Scala,
Swift,
Markdown,
Latex,
Html,
Sol,
}
fn get_separators_for_language(language: SupportedTextSplitterLanguage) -> Vec<&'static str> {
match language {
SupportedTextSplitterLanguage::Cpp => vec![
"\nclass ", "\nvoid ", "\nint ", "\nfloat ", "\ndouble ",
"\nif ", "\nfor ", "\nwhile ", "\nswitch ", "\ncase ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Go => vec![
"\nfunc ", "\nvar ", "\nconst ", "\ntype ",
"\nif ", "\nfor ", "\nswitch ", "\ncase ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Java => vec![
"\nclass ", "\npublic ", "\nprotected ", "\nprivate ", "\nstatic ",
"\nif ", "\nfor ", "\nwhile ", "\nswitch ", "\ncase ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Js => vec![
"\nfunction ", "\nconst ", "\nlet ", "\nvar ", "\nclass ",
"\nif ", "\nfor ", "\nwhile ", "\nswitch ", "\ncase ", "\ndefault ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Php => vec![
"\nfunction ", "\nclass ",
"\nif ", "\nforeach ", "\nwhile ", "\ndo ", "\nswitch ", "\ncase ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Proto => vec![
"\nmessage ", "\nservice ", "\nenum ", "\noption ", "\nimport ", "\nsyntax ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Python => vec![
"\nclass ", "\ndef ", "\n\tdef ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Rst => vec![
"\n===\n", "\n---\n", "\n***\n", "\n.. ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Ruby => vec![
"\ndef ", "\nclass ",
"\nif ", "\nunless ", "\nwhile ", "\nfor ", "\ndo ", "\nbegin ", "\nrescue ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Rust => vec![
"\nfn ", "\nconst ", "\nlet ",
"\nif ", "\nwhile ", "\nfor ", "\nloop ", "\nmatch ", "\nconst ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Scala => vec![
"\nclass ", "\nobject ",
"\ndef ", "\nval ", "\nvar ",
"\nif ", "\nfor ", "\nwhile ", "\nmatch ", "\ncase ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Swift => vec![
"\nfunc ", "\nclass ", "\nstruct ", "\nenum ",
"\nif ", "\nfor ", "\nwhile ", "\ndo ", "\nswitch ", "\ncase ",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Markdown => vec![
"\n## ", "\n### ", "\n#### ", "\n##### ", "\n###### ",
"```\n\n", "\n\n***\n\n", "\n\n---\n\n", "\n\n___\n\n",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Latex => vec![
"\n\\chapter{", "\n\\section{", "\n\\subsection{", "\n\\subsubsection{",
"\n\\begin{enumerate}", "\n\\begin{itemize}", "\n\\begin{description}", "\n\\begin{list}", "\n\\begin{quote}", "\n\\begin{quotation}", "\n\\begin{verse}", "\n\\begin{verbatim}",
"\n\\begin{align}", "$$", "$",
"\n\n", "\n", " ", "",
],
SupportedTextSplitterLanguage::Html => vec![
"<body>", "<div>", "<p>", "<br>", "<li>", "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>", "<span>", "<table>", "<tr>", "<td>", "<th>", "<ul>", "<ol>", "<header>", "<footer>", "<nav>",
"<head>", "<style>", "<script>", "<meta>", "<title>",
" ", "",
],
SupportedTextSplitterLanguage::Sol => vec![
"\npragma ", "\nusing ",
"\ncontract ", "\ninterface ", "\nlibrary ",
"\nconstructor ", "\ntype ", "\nfunction ", "\nevent ", "\nmodifier ", "\nerror ", "\nstruct ", "\nenum ",
"\nif ", "\nfor ", "\nwhile ", "\ndo while ", "\nassembly ",
"\n\n", "\n", " ", "",
],
}
}
fn main() {
let separators = get_separators_for_language(SupportedTextSplitterLanguage::Rust);
for separator in separators {
println!("{}", separator);
}
}
ZW51bSBTdXBwb3J0ZWRUZXh0U3BsaXR0ZXJMYW5ndWFnZSB7CiAgICBDcHAsCiAgICBHbywKICAgIEphdmEsCiAgICBKcywKICAgIFBocCwKICAgIFByb3RvLAogICAgUHl0aG9uLAogICAgUnN0LAogICAgUnVieSwKICAgIFJ1c3QsCiAgICBTY2FsYSwKICAgIFN3aWZ0LAogICAgTWFya2Rvd24sCiAgICBMYXRleCwKICAgIEh0bWwsCiAgICBTb2wsCn0KCmZuIGdldF9zZXBhcmF0b3JzX2Zvcl9sYW5ndWFnZShsYW5ndWFnZTogU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2UpIC0+IFZlYzwmJ3N0YXRpYyBzdHI+IHsKICAgIG1hdGNoIGxhbmd1YWdlIHsKICAgICAgICBTdXBwb3J0ZWRUZXh0U3BsaXR0ZXJMYW5ndWFnZTo6Q3BwID0+IHZlYyFbCiAgICAgICAgICAgICJcbmNsYXNzICIsICJcbnZvaWQgIiwgIlxuaW50ICIsICJcbmZsb2F0ICIsICJcbmRvdWJsZSAiLAogICAgICAgICAgICAiXG5pZiAiLCAiXG5mb3IgIiwgIlxud2hpbGUgIiwgIlxuc3dpdGNoICIsICJcbmNhc2UgIiwKICAgICAgICAgICAgIlxuXG4iLCAiXG4iLCAiICIsICIiLAogICAgICAgIF0sCiAgICAgICAgU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2U6OkdvID0+IHZlYyFbCiAgICAgICAgICAgICJcbmZ1bmMgIiwgIlxudmFyICIsICJcbmNvbnN0ICIsICJcbnR5cGUgIiwKICAgICAgICAgICAgIlxuaWYgIiwgIlxuZm9yICIsICJcbnN3aXRjaCAiLCAiXG5jYXNlICIsCiAgICAgICAgICAgICJcblxuIiwgIlxuIiwgIiAiLCAiIiwKICAgICAgICBdLAogICAgICAgIFN1cHBvcnRlZFRleHRTcGxpdHRlckxhbmd1YWdlOjpKYXZhID0+IHZlYyFbCiAgICAgICAgICAgICJcbmNsYXNzICIsICJcbnB1YmxpYyAiLCAiXG5wcm90ZWN0ZWQgIiwgIlxucHJpdmF0ZSAiLCAiXG5zdGF0aWMgIiwKICAgICAgICAgICAgIlxuaWYgIiwgIlxuZm9yICIsICJcbndoaWxlICIsICJcbnN3aXRjaCAiLCAiXG5jYXNlICIsCiAgICAgICAgICAgICJcblxuIiwgIlxuIiwgIiAiLCAiIiwKICAgICAgICBdLAogICAgICAgIFN1cHBvcnRlZFRleHRTcGxpdHRlckxhbmd1YWdlOjpKcyA9PiB2ZWMhWwogICAgICAgICAgICAiXG5mdW5jdGlvbiAiLCAiXG5jb25zdCAiLCAiXG5sZXQgIiwgIlxudmFyICIsICJcbmNsYXNzICIsCiAgICAgICAgICAgICJcbmlmICIsICJcbmZvciAiLCAiXG53aGlsZSAiLCAiXG5zd2l0Y2ggIiwgIlxuY2FzZSAiLCAiXG5kZWZhdWx0ICIsCiAgICAgICAgICAgICJcblxuIiwgIlxuIiwgIiAiLCAiIiwKICAgICAgICBdLAogICAgICAgIFN1cHBvcnRlZFRleHRTcGxpdHRlckxhbmd1YWdlOjpQaHAgPT4gdmVjIVsKICAgICAgICAgICAgIlxuZnVuY3Rpb24gIiwgIlxuY2xhc3MgIiwKICAgICAgICAgICAgIlxuaWYgIiwgIlxuZm9yZWFjaCAiLCAiXG53aGlsZSAiLCAiXG5kbyAiLCAiXG5zd2l0Y2ggIiwgIlxuY2FzZSAiLAogICAgICAgICAgICAiXG5cbiIsICJcbiIsICIgIiwgIiIsCiAgICAgICAgXSwKICAgICAgICBTdXBwb3J0ZWRUZXh0U3BsaXR0ZXJMYW5ndWFnZTo6UHJvdG8gPT4gdmVjIVsKICAgICAgICAgICAgIlxubWVzc2FnZSAiLCAiXG5zZXJ2aWNlICIsICJcbmVudW0gIiwgIlxub3B0aW9uICIsICJcbmltcG9ydCAiLCAiXG5zeW50YXggIiwKICAgICAgICAgICAgIlxuXG4iLCAiXG4iLCAiICIsICIiLAogICAgICAgIF0sCiAgICAgICAgU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2U6OlB5dGhvbiA9PiB2ZWMhWwogICAgICAgICAgICAiXG5jbGFzcyAiLCAiXG5kZWYgIiwgIlxuXHRkZWYgIiwKICAgICAgICAgICAgIlxuXG4iLCAiXG4iLCAiICIsICIiLAogICAgICAgIF0sCiAgICAgICAgU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2U6OlJzdCA9PiB2ZWMhWwogICAgICAgICAgICAiXG49PT1cbiIsICJcbi0tLVxuIiwgIlxuKioqXG4iLCAiXG4uLiAiLAogICAgICAgICAgICAiXG5cbiIsICJcbiIsICIgIiwgIiIsCiAgICAgICAgXSwKICAgICAgICBTdXBwb3J0ZWRUZXh0U3BsaXR0ZXJMYW5ndWFnZTo6UnVieSA9PiB2ZWMhWwogICAgICAgICAgICAiXG5kZWYgIiwgIlxuY2xhc3MgIiwKICAgICAgICAgICAgIlxuaWYgIiwgIlxudW5sZXNzICIsICJcbndoaWxlICIsICJcbmZvciAiLCAiXG5kbyAiLCAiXG5iZWdpbiAiLCAiXG5yZXNjdWUgIiwKICAgICAgICAgICAgIlxuXG4iLCAiXG4iLCAiICIsICIiLAogICAgICAgIF0sCiAgICAgICAgU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2U6OlJ1c3QgPT4gdmVjIVsKICAgICAgICAgICAgIlxuZm4gIiwgIlxuY29uc3QgIiwgIlxubGV0ICIsCiAgICAgICAgICAgICJcbmlmICIsICJcbndoaWxlICIsICJcbmZvciAiLCAiXG5sb29wICIsICJcbm1hdGNoICIsICJcbmNvbnN0ICIsCiAgICAgICAgICAgICJcblxuIiwgIlxuIiwgIiAiLCAiIiwKICAgICAgICBdLAogICAgICAgIFN1cHBvcnRlZFRleHRTcGxpdHRlckxhbmd1YWdlOjpTY2FsYSA9PiB2ZWMhWwogICAgICAgICAgICAiXG5jbGFzcyAiLCAiXG5vYmplY3QgIiwKICAgICAgICAgICAgIlxuZGVmICIsICJcbnZhbCAiLCAiXG52YXIgIiwKICAgICAgICAgICAgIlxuaWYgIiwgIlxuZm9yICIsICJcbndoaWxlICIsICJcbm1hdGNoICIsICJcbmNhc2UgIiwKICAgICAgICAgICAgIlxuXG4iLCAiXG4iLCAiICIsICIiLAogICAgICAgIF0sCiAgICAgICAgU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2U6OlN3aWZ0ID0+IHZlYyFbCiAgICAgICAgICAgICJcbmZ1bmMgIiwgIlxuY2xhc3MgIiwgIlxuc3RydWN0ICIsICJcbmVudW0gIiwKICAgICAgICAgICAgIlxuaWYgIiwgIlxuZm9yICIsICJcbndoaWxlICIsICJcbmRvICIsICJcbnN3aXRjaCAiLCAiXG5jYXNlICIsCiAgICAgICAgICAgICJcblxuIiwgIlxuIiwgIiAiLCAiIiwKICAgICAgICBdLAogICAgICAgIFN1cHBvcnRlZFRleHRTcGxpdHRlckxhbmd1YWdlOjpNYXJrZG93biA9PiB2ZWMhWwogICAgICAgICAgICAiXG4jIyAiLCAiXG4jIyMgIiwgIlxuIyMjIyAiLCAiXG4jIyMjIyAiLCAiXG4jIyMjIyMgIiwKICAgICAgICAgICAgImBgYFxuXG4iLCAiXG5cbioqKlxuXG4iLCAiXG5cbi0tLVxuXG4iLCAiXG5cbl9fX1xuXG4iLAogICAgICAgICAgICAiXG5cbiIsICJcbiIsICIgIiwgIiIsCiAgICAgICAgXSwKICAgICAgICBTdXBwb3J0ZWRUZXh0U3BsaXR0ZXJMYW5ndWFnZTo6TGF0ZXggPT4gdmVjIVsKICAgICAgICAgICAgIlxuXFxjaGFwdGVyeyIsICJcblxcc2VjdGlvbnsiLCAiXG5cXHN1YnNlY3Rpb257IiwgIlxuXFxzdWJzdWJzZWN0aW9ueyIsCiAgICAgICAgICAgICJcblxcYmVnaW57ZW51bWVyYXRlfSIsICJcblxcYmVnaW57aXRlbWl6ZX0iLCAiXG5cXGJlZ2lue2Rlc2NyaXB0aW9ufSIsICJcblxcYmVnaW57bGlzdH0iLCAiXG5cXGJlZ2lue3F1b3RlfSIsICJcblxcYmVnaW57cXVvdGF0aW9ufSIsICJcblxcYmVnaW57dmVyc2V9IiwgIlxuXFxiZWdpbnt2ZXJiYXRpbX0iLAogICAgICAgICAgICAiXG5cXGJlZ2lue2FsaWdufSIsICIkJCIsICIkIiwKICAgICAgICAgICAgIlxuXG4iLCAiXG4iLCAiICIsICIiLAogICAgICAgIF0sCiAgICAgICAgU3VwcG9ydGVkVGV4dFNwbGl0dGVyTGFuZ3VhZ2U6Okh0bWwgPT4gdmVjIVsKICAgICAgICAgICAgIjxib2R5PiIsICI8ZGl2PiIsICI8cD4iLCAiPGJyPiIsICI8bGk+IiwgIjxoMT4iLCAiPGgyPiIsICI8aDM+IiwgIjxoND4iLCAiPGg1PiIsICI8aDY+IiwgIjxzcGFuPiIsICI8dGFibGU+IiwgIjx0cj4iLCAiPHRkPiIsICI8dGg+IiwgIjx1bD4iLCAiPG9sPiIsICI8aGVhZGVyPiIsICI8Zm9vdGVyPiIsICI8bmF2PiIsCiAgICAgICAgICAgICI8aGVhZD4iLCAiPHN0eWxlPiIsICI8c2NyaXB0PiIsICI8bWV0YT4iLCAiPHRpdGxlPiIsCiAgICAgICAgICAgICIgIiwgIiIsCiAgICAgICAgXSwKICAgICAgICBTdXBwb3J0ZWRUZXh0U3BsaXR0ZXJMYW5ndWFnZTo6U29sID0+IHZlYyFbCiAgICAgICAgICAgICJcbnByYWdtYSAiLCAiXG51c2luZyAiLAogICAgICAgICAgICAiXG5jb250cmFjdCAiLCAiXG5pbnRlcmZhY2UgIiwgIlxubGlicmFyeSAiLAogICAgICAgICAgICAiXG5jb25zdHJ1Y3RvciAiLCAiXG50eXBlICIsICJcbmZ1bmN0aW9uICIsICJcbmV2ZW50ICIsICJcbm1vZGlmaWVyICIsICJcbmVycm9yICIsICJcbnN0cnVjdCAiLCAiXG5lbnVtICIsCiAgICAgICAgICAgICJcbmlmICIsICJcbmZvciAiLCAiXG53aGlsZSAiLCAiXG5kbyB3aGlsZSAiLCAiXG5hc3NlbWJseSAiLAogICAgICAgICAgICAiXG5cbiIsICJcbiIsICIgIiwgIiIsCiAgICAgICAgXSwKICAgIH0KfQoKZm4gbWFpbigpIHsKICAgIGxldCBzZXBhcmF0b3JzID0gZ2V0X3NlcGFyYXRvcnNfZm9yX2xhbmd1YWdlKFN1cHBvcnRlZFRleHRTcGxpdHRlckxhbmd1YWdlOjpSdXN0KTsKICAgIGZvciBzZXBhcmF0b3IgaW4gc2VwYXJhdG9ycyB7CiAgICAgICAgcHJpbnRsbiEoInt9Iiwgc2VwYXJhdG9yKTsKICAgIH0KfQ==