1 module minijson.lib;
2 
3 import std : ctRegex, matchAll, matchFirst;
4 
5 import despacer.simd_check : supports_sse4_1, supports_avx2;
6 
7 const tokenizerWithComment = ctRegex!(`"|(/\*)|(\*/)|(//)|\n|\r|\[|]`, "g");
8 const tokenizerNoComment = ctRegex!(`[\n\r"[]]`, "g");
9 
10 /**
11   Minify the given JSON string
12 
13   Params:
14     jsonString  = the json string you want to minify
15     hasComment = a boolean to support comments in json. Default: `false`.
16 
17   Return:
18     the minified json string
19 */
20 string minifyString(in string jsonString, in bool hasComment = false) @trusted
21 {
22   auto in_string = false;
23   auto in_multiline_comment = false;
24   auto in_singleline_comment = false;
25   string result;
26   size_t from = 0;
27   auto rightContext = "";
28 
29   const tokenizer = !hasComment ? tokenizerNoComment : tokenizerWithComment;
30 
31   auto match = jsonString.matchAll(tokenizer);
32 
33   while (!match.empty())
34   {
35     const matchFrontHit = match.front().hit();
36 
37     rightContext = match.post();
38 
39     // update from for the next iteration
40     const prevFrom = from;
41     from = jsonString.length - rightContext.length; // lastIndex
42 
43     const notInComment = (!in_multiline_comment && !in_singleline_comment);
44     const noCommentOrNotInComment = !hasComment || notInComment;
45 
46     if (noCommentOrNotInComment)
47     {
48       auto leftContextSubstr = match.pre()[prevFrom .. $];
49       const noLeftContext = leftContextSubstr.length == 0;
50       if (!noLeftContext) {
51         if (!in_string)
52         {
53           leftContextSubstr = remove_spaces(leftContextSubstr);
54         }
55         result ~= leftContextSubstr;
56       }
57       if (matchFrontHit == "\"")
58       {
59         if (!in_string || noLeftContext || hasNoSlashOrEvenNumberOfSlashes(leftContextSubstr))
60         {
61           // start of string with ", or unescaped " character found to end string
62           in_string = !in_string;
63         }
64         --from; // include " character in next catch
65         rightContext = jsonString[from .. $];
66       }
67     }
68     // comments
69     if (hasComment && !in_string)
70     {
71       if (notInComment)
72       {
73         if (matchFrontHit == "/*")
74         {
75           in_multiline_comment = true;
76         }
77         else if (matchFrontHit == "//")
78         {
79           in_singleline_comment = true;
80         }
81         else if (notSlashAndNoSpaceOrBreak(matchFrontHit))
82         {
83           result ~= matchFrontHit;
84         }
85       }
86       else if (in_multiline_comment && !in_singleline_comment && matchFrontHit == "*/")
87       {
88         in_multiline_comment = false;
89       }
90       else if (!in_multiline_comment && in_singleline_comment && (matchFrontHit == "\n" || matchFrontHit == "\r"))
91       {
92         in_singleline_comment = false;
93       }
94     }
95     else if (!hasComment && notSlashAndNoSpaceOrBreak(matchFrontHit))
96     {
97       result ~= matchFrontHit;
98     }
99     match.popFront();
100   }
101   result ~= rightContext;
102   return result;
103 }
104 
105 private bool hasNoSlashOrEvenNumberOfSlashes(in string leftContextSubstr) @safe @nogc
106 {
107   size_t slashCount = 0;
108 
109   // NOTE leftContextSubstr.length is not 0 (checked outside of the function)
110   size_t index = leftContextSubstr.length - 1;
111 
112   // loop over the string backwards and find `\`
113   while (leftContextSubstr[index] == '\\')
114   {
115     slashCount += 1;
116 
117     index -= 1;
118   }
119   // no slash or even number of slashes
120   return slashCount % 2 == 0;
121 }
122 
123 private bool notSlashAndNoSpaceOrBreak(const ref string matchFrontHit) @safe
124 {
125   return matchFrontHit != "\"" && hasNoSpace(matchFrontHit);
126 }
127 
128 /** Removes spaces from the original string */
129 private string remove_spaces(string str) @trusted nothrow
130 {
131   static if (supports_sse4_1())
132   {
133     import despacer.despacer : sse4_despace_branchless_u4;
134 
135     // this wrapper reduces the overall time by 15 compared to d_sse4_despace_branchless_u4 because of no dup and toStringz
136     auto cstr = cast(char*) str;
137     const length = str.length;
138     return str[0 .. sse4_despace_branchless_u4(cstr, length)];
139   }
140   else
141   {
142     const spaceOrBreakRegex = ctRegex!(`\s`);
143     leftContextSubstr.replaceAll(spaceOrBreakRegex, "");
144   }
145 }
146 
147 /** Check if the given string has space  */
148 private bool hasNoSpace(const ref string matchFrontHit) @trusted
149 {
150   static if (supports_avx2())
151   {
152     import despacer.despacer : avx2_hasspace;
153 
154     // the algorithm never checks for zero termination so toStringz is not needed
155     return !avx2_hasspace(cast(const char*) matchFrontHit, matchFrontHit.length);
156   }
157   else
158   {
159     const spaceOrBreakRegex = ctRegex!(`\s`);
160     return matchFrontHit.matchFirst(spaceOrBreakRegex).empty();
161   }
162 }
163 
164 /**
165   Minify the given files in place. It minifies the files in parallel.
166 
167   Params:
168     files = the paths to the files.
169     hasComment = a boolean to support comments in json. Default: `false`.
170 */
171 void minifyFiles(in string[] files, in bool hasComment = false)
172 {
173   import std.parallelism : parallel;
174   import std.file : readText, write;
175 
176   foreach (file; files.parallel())
177   {
178     write(file, minifyString(readText(file), hasComment));
179   }
180 }