Dev-cpp模仿者

在网页上展示C/C++代码时,如果没有语法高亮,就会显得很平淡。我们需要分别为每一个保留字、符号、注释添加粗体、高亮等。但每一份代码都手动添加的话,费时又费力,因此必须使用程序来自动生成。Dev-C++自带了这个功能,只需在文件>导出菜单中选择“到HTML”,就会提供一份当前源码的HTML副本。然而:

  1. 我当时竟然还没发现有这个工具(太惨了)
  2. 它不支持自动显示行号
  3. 它以span class=xxx的形式输出每一个标记,这样会使HTML文件占用的空间变得很大。现在已经没有浏览器还不支持CSS1的后代选择器了,我更愿意以自己的方式处理,如将i标签用作注释,em标签用于保留字,b标签用于标点符号,等等。这样子不是更加简洁吗?

另一个解决方案是,使用例如highlight.js之类的库。我不想那样做,因为

  1. 我还看不懂js代码,尤其是有些地方的判断用到了正则表达式,简直搞得我头晕脑胀的。对我来说,代码完全看不懂,开源了就跟没开似的。
  2. 个性化比较差,感觉网上到处都是这种类型的代码高亮,千篇一律。(虽然我也知道,改个CSS文件就行了吧。)
  3. 可能用到了一些高级特性,对旧浏览器的支持不好。
  4. 他们搞得太复杂啦!比如同时支持多语言。我暂时不需要那么多的功能,只想搞好我最迫切需要的的C语言,来个极简化的代码。

于是我用自己最擅长的C语言开发了一个程序,可以用HTML打印C++代码,和Dev-C++中一样丰富多彩。其实我早在四年以前就有了这个想法,今天总算是实现啦!和原版生成的HTML效果几乎一样,除了一些特殊的地方会有差别,例如在宏定义的中间穿插注释时(具体的见test-all.c,第26行与第66行那里),这算是Dev-cpp模仿者目前版本一个已知的bug,以后会完善的。

源码如下:

  1. /*
  2. * Description: Dev-cpp Imitator
  3. * Version: 0.1
  4. * Date: 2025-08-15
  5. * Author: Shurui Zhang
  6. * Contact: https://qowk.xyz/contact/
  7. */
  8. #include <stdio.h>

  9. #include <ctype.h>

  10. #include <string.h>

  11. char l[200];
  12. enum Line_State {
  13. none,
  14. string_literal,
  15. preprocessor_directive,
  16. single_line_comment,
  17. multiple_line_comment
  18. } line_state;
  19. typedef enum {
  20. NUMBER,
  21. KEYWORD,
  22. STRING,
  23. SYMBOL,
  24. COMMENT,
  25. PREPROCESSOR,
  26. IDENTIFIER
  27. } token_types;
  28. const char *opening_tag[] = {
  29. "<a>", "<em>", "<q>", "<b>", "<i>", "<p>", ""
  30. };
  31. const char *closing_tag[] = {
  32. "</a>", "</em>", "</q>", "</b>", "</i>", "</p>", ""
  33. };
  34. /* range: [start, end) */
  35. void outputHTML(token_types style, int start, int end)
  36. {
  37. static token_types last_style = IDENTIFIER;
  38. if (style != last_style) {
  39. /* merge consecutive elements */
  40. printf("%s%s", closing_tag[last_style], opening_tag[style]);
  41. last_style = style;
  42. }
  43. for (; start < end; start++) {
  44. /* check for escaped characters */
  45. if (l[start] == '<') printf("&lt;");
  46. else if (l[start] == '>') printf("&gt;");
  47. else if (l[start] == '&') printf("&amp;");
  48. else putchar(l[start]);
  49. }
  50. }
  51. int is_keyword(int start, int end)
  52. {
  53. const char *keywords[] = {
  54. "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand",
  55. "bitor", "bool", "break", "case", "catch", "char", "char16_t",
  56. "char32_t", "class", "compl", "const", "const_cast",
  57. "constexpr", "continue", "decltype", "default", "delete", "do",
  58. "double", "dynamic_cast", "else", "enum", "explicit", "export",
  59. "extern", "false", "float", "for", "friend", "goto", "if",
  60. "inline", "int", "long", "mutable", "namespace", "new",
  61. "noexcept", "not", "not_eq", "nullptr", "operator", "or",
  62. "or_eq", "private", "protected", "public", "register",
  63. "reinterpret_cast", "return", "short", "signed", "sizeof",
  64. "static", "static_assert", "static_cast", "struct", "switch",
  65. "template", "this", "thread_local", "throw", "true", "try",
  66. "typedef", "typeid", "typename", "union", "unsigned", "using",
  67. "virtual", "void", "volatile", "wchar_t", "while", "xor",
  68. "xor_eq"
  69. };
  70. char t;
  71. int i;
  72. t = l[end];
  73. l[end] = '\0';
  74. for (i = 0; i < sizeof keywords / sizeof *keywords; i++) {
  75. if (!strcmp(&l[start], keywords[i])) {
  76. l[end] = t;
  77. return 1;
  78. }
  79. }
  80. l[end] = t;
  81. return 0;
  82. }
  83. void process_line()
  84. {
  85. int head = 0, tail = 0;
  86. int len;
  87. /* #define DEBUG */
  88. #ifdef DEBUG

  89. for (; l[head]; head++) {
  90. printf("%x ", l[head]);
  91. }
  92. return;
  93. #endif

  94. len = strlen(l);
  95. if (len == sizeof l - 1) {
  96. fputs("line character buffer limit exceeded", stderr);
  97. return;
  98. }
  99. /* strip trivial LF and CR characters */
  100. if (l[len - 1] == '\n')
  101. l[--len] = '\0';
  102. if (l[len - 1] == '\r')
  103. l[--len] = '\0';
  104. switch (line_state)
  105. do {
  106. head = tail;
  107. default:
  108. if (isspace(l[tail])) {
  109. putchar(l[tail++]);
  110. continue;
  111. }
  112. if (isalpha(l[tail]) || l[tail] == '_') {
  113. do tail++;
  114. while (isalnum(l[tail]) || l[tail] == '_');
  115. outputHTML(is_keyword(head, tail) ?
  116. KEYWORD : IDENTIFIER, head, tail);
  117. continue;
  118. }
  119. if (l[tail] == '#') {
  120. case preprocessor_directive:
  121. /* repeat until meet comments */
  122. do tail++;
  123. while (l[tail] && (l[tail] != '/'
  124. || (l[tail + 1] != '/'
  125. && l[tail + 1] != '*')));
  126. if (!l[tail] && l[tail - 1] == '\\')
  127. line_state = preprocessor_directive;
  128. outputHTML(PREPROCESSOR, head, tail);
  129. if (!l[tail] && l[tail - 1] == '\\') return;
  130. continue;
  131. }
  132. if (l[tail] == '"') {
  133. tail++;
  134. case string_literal:
  135. /* When l[tail] = '\0', it is (maybe)
  136. a C++11 raw string literal, or syntax error */
  137. while (l[tail] != '"' && l[tail]) {
  138. if (l[tail] == '\\' && !l[++tail]) {
  139. /* line wrap */
  140. line_state = string_literal;
  141. outputHTML(STRING, head, tail);
  142. return;
  143. }
  144. tail++;
  145. }
  146. tail++;
  147. outputHTML(STRING, head, tail);
  148. continue;
  149. }
  150. if (l[tail] == '\'') {
  151. do {
  152. if (l[tail] == '\\' && l[tail + 1])
  153. tail++;
  154. tail++;
  155. } while (l[tail] != '\'' && l[tail]);
  156. tail++;
  157. outputHTML(IDENTIFIER, head, tail);
  158. continue;
  159. }
  160. if (l[tail] == '/' && l[tail + 1] == '/') {
  161. case single_line_comment:
  162. line_state = l[len - 1] == '\\' ?
  163. single_line_comment : none;
  164. outputHTML(COMMENT, head, len);
  165. return;
  166. }
  167. if (l[tail] == '/' && l[tail + 1] == '*') {
  168. tail += 2;
  169. case multiple_line_comment:
  170. /* repeat until the end of the multiple-line comment */
  171. while (l[tail] != '*' || l[tail + 1] != '/') {
  172. if (!l[tail]) {
  173. /* at the end of line, the multiple line comment will
  174. continue */
  175. line_state = multiple_line_comment;
  176. outputHTML(COMMENT, head, tail);
  177. return;
  178. }
  179. tail++;
  180. }
  181. tail += 2;
  182. outputHTML(COMMENT, head, tail);
  183. continue;
  184. }
  185. if (isdigit(l[tail]) ||
  186. (l[tail] == '.' && isdigit(l[tail + 1]))) {
  187. do {
  188. tail++;
  189. if (toupper(l[tail]) == 'E'
  190. && (l[tail + 1] == '+'
  191. || l[tail + 1] == '-')){
  192. tail += 2;
  193. }
  194. } while (isalnum(l[tail]) || l[tail] == '.');
  195. outputHTML(NUMBER, head, tail);
  196. continue;
  197. }
  198. if (ispunct(l[tail])) {
  199. tail++;
  200. outputHTML(SYMBOL, head, tail);
  201. continue;
  202. }
  203. if (l[tail]) {
  204. fputs("Unknown Error", stderr);
  205. return;
  206. }
  207. } while (l[tail]);
  208. line_state = none;
  209. }
  210. int main()
  211. {
  212. printf(
  213. "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN' "
  214. "'http://www.w3.org/TR/html4/strict.dtd'>"
  215. "<meta http-equiv=content-type "
  216. "content='text/html; charset=utf-8'>" /* charset=gb2312 */
  217. "<style type='text/css'>"
  218. "ol.devcppimitator i {color:#0078d7} "
  219. "ol.devcppimitator q "
  220. "{color:blue; font-weight:bold; quotes:none} "
  221. "ol.devcppimitator b {color:red} "
  222. "ol.devcppimitator em "
  223. "{color:black; font-weight:bold; font-style:normal} "
  224. "ol.devcppimitator a {color:purple} "
  225. "ol.devcppimitator p {display:inline; color:green} "
  226. "ol.devcppimitator li {white-space:pre} " /* ; tab-size:4 */
  227. /* tab-size is in CSS3 */
  228. );
  229. printf(
  230. "ol.devcppimitator {width:800px; margin:0 auto; "
  231. "padding-left:60px; border:solid 1px; "
  232. "font-family:Consolas,'Courier New',monospace}"
  233. "</style>"
  234. "<title>Dev-cpp Imitator</title>"
  235. "<div style='width:1000px; margin:0 auto; padding:10px;"
  236. "border:solid 1px'>"
  237. "<p>The sample is as follows:\n"
  238. "<ol class=devcppimitator>\n"
  239. );
  240. while (fgets(l, sizeof l, stdin)) {
  241. printf("<li>");
  242. process_line();
  243. /* flush buffer */
  244. outputHTML(IDENTIFIER, 0, 0);
  245. /* This is necessary, because an empty li element
  246. doesn't occupy a line, resulting in copy failures. */
  247. putchar('\n');
  248. }
  249. printf("</ol>\n</div>");
  250. return 0;
  251. }

编译与运行的方法:

先把上面的代码全部复制下来,保存为a.c。

在DOS/Windows上:

先用Dev-C++等工具直接编译,然后在cmd命令行中输入(使用重定向的方法)
a.exe < a.c > a.htm

如果你用PowerShell那么则是

Get-Content a.c | .\a.exe | Out-File a.htm -Encoding utf8

在Linux上:

gcc a.c -o a.out
./a.out < a.c > a.htm

再用浏览器打开a.htm文件,就可以看到被高亮的代码了。

最后将a.htm中的整个ol元素(每行代码都会在HTML文件当中占用一行,相当于从第二行复制到倒数第二行)粘贴到你网页的HTML代码中间去,记得在头部添加文档级样式表,或者以外部样式表的方式,就比如我网站的theme02.css