1 module minijson.lib; 2 3 import std : ctRegex, matchAll, matchFirst; 4 5 import despacer.simd_check : supports_sse4_1, supports_avx2; 6 7 const tokenizerWithComment = ctRegex!(`"|(/\*)|(\*/)|(//)|\n|\r|\[|]`, "g"); 8 const tokenizerNoComment = ctRegex!(`[\n\r"[]]`, "g"); 9 10 /** 11 Minify the given JSON string 12 13 Params: 14 jsonString = the json string you want to minify 15 hasComment = a boolean to support comments in json. Default: `false`. 16 17 Return: 18 the minified json string 19 */ 20 string minifyString(in string jsonString, in bool hasComment = false) @trusted 21 { 22 auto in_string = false; 23 auto in_multiline_comment = false; 24 auto in_singleline_comment = false; 25 string result; 26 size_t from = 0; 27 auto rightContext = ""; 28 29 const tokenizer = !hasComment ? tokenizerNoComment : tokenizerWithComment; 30 31 auto match = jsonString.matchAll(tokenizer); 32 33 while (!match.empty()) 34 { 35 const matchFrontHit = match.front().hit(); 36 37 rightContext = match.post(); 38 39 // update from for the next iteration 40 const prevFrom = from; 41 from = jsonString.length - rightContext.length; // lastIndex 42 43 const notInComment = (!in_multiline_comment && !in_singleline_comment); 44 const noCommentOrNotInComment = !hasComment || notInComment; 45 46 if (noCommentOrNotInComment) 47 { 48 auto leftContextSubstr = match.pre()[prevFrom .. $]; 49 const noLeftContext = leftContextSubstr.length == 0; 50 if (!noLeftContext) { 51 if (!in_string) 52 { 53 leftContextSubstr = remove_spaces(leftContextSubstr); 54 } 55 result ~= leftContextSubstr; 56 } 57 if (matchFrontHit == "\"") 58 { 59 if (!in_string || noLeftContext || hasNoSlashOrEvenNumberOfSlashes(leftContextSubstr)) 60 { 61 // start of string with ", or unescaped " character found to end string 62 in_string = !in_string; 63 } 64 --from; // include " character in next catch 65 rightContext = jsonString[from .. $]; 66 } 67 } 68 // comments 69 if (hasComment && !in_string) 70 { 71 if (notInComment) 72 { 73 if (matchFrontHit == "/*") 74 { 75 in_multiline_comment = true; 76 } 77 else if (matchFrontHit == "//") 78 { 79 in_singleline_comment = true; 80 } 81 else if (notSlashAndNoSpaceOrBreak(matchFrontHit)) 82 { 83 result ~= matchFrontHit; 84 } 85 } 86 else if (in_multiline_comment && !in_singleline_comment && matchFrontHit == "*/") 87 { 88 in_multiline_comment = false; 89 } 90 else if (!in_multiline_comment && in_singleline_comment && (matchFrontHit == "\n" || matchFrontHit == "\r")) 91 { 92 in_singleline_comment = false; 93 } 94 } 95 else if (!hasComment && notSlashAndNoSpaceOrBreak(matchFrontHit)) 96 { 97 result ~= matchFrontHit; 98 } 99 match.popFront(); 100 } 101 result ~= rightContext; 102 return result; 103 } 104 105 private bool hasNoSlashOrEvenNumberOfSlashes(in string leftContextSubstr) @safe @nogc 106 { 107 size_t slashCount = 0; 108 109 // NOTE leftContextSubstr.length is not 0 (checked outside of the function) 110 size_t index = leftContextSubstr.length - 1; 111 112 // loop over the string backwards and find `\` 113 while (leftContextSubstr[index] == '\\') 114 { 115 slashCount += 1; 116 117 index -= 1; 118 } 119 // no slash or even number of slashes 120 return slashCount % 2 == 0; 121 } 122 123 private bool notSlashAndNoSpaceOrBreak(const ref string matchFrontHit) @safe 124 { 125 return matchFrontHit != "\"" && hasNoSpace(matchFrontHit); 126 } 127 128 /** Removes spaces from the original string */ 129 private string remove_spaces(string str) @trusted nothrow 130 { 131 static if (supports_sse4_1()) 132 { 133 import despacer.despacer : sse4_despace_branchless_u4; 134 135 // this wrapper reduces the overall time by 15 compared to d_sse4_despace_branchless_u4 because of no dup and toStringz 136 auto cstr = cast(char*) str; 137 const length = str.length; 138 return str[0 .. sse4_despace_branchless_u4(cstr, length)]; 139 } 140 else 141 { 142 const spaceOrBreakRegex = ctRegex!(`\s`); 143 leftContextSubstr.replaceAll(spaceOrBreakRegex, ""); 144 } 145 } 146 147 /** Check if the given string has space */ 148 private bool hasNoSpace(const ref string matchFrontHit) @trusted 149 { 150 static if (supports_avx2()) 151 { 152 import despacer.despacer : avx2_hasspace; 153 154 // the algorithm never checks for zero termination so toStringz is not needed 155 return !avx2_hasspace(cast(const char*) matchFrontHit, matchFrontHit.length); 156 } 157 else 158 { 159 const spaceOrBreakRegex = ctRegex!(`\s`); 160 return matchFrontHit.matchFirst(spaceOrBreakRegex).empty(); 161 } 162 } 163 164 /** 165 Minify the given files in place. It minifies the files in parallel. 166 167 Params: 168 files = the paths to the files. 169 hasComment = a boolean to support comments in json. Default: `false`. 170 */ 171 void minifyFiles(in string[] files, in bool hasComment = false) 172 { 173 import std.parallelism : parallel; 174 import std.file : readText, write; 175 176 foreach (file; files.parallel()) 177 { 178 write(file, minifyString(readText(file), hasComment)); 179 } 180 }