Dev-cpp模仿者
在网页上展示C/C++代码时,如果没有语法高亮,就会显得很平淡。我们需要分别为每一个保留字、符号、注释添加粗体、高亮等。但每一份代码都手动添加的话,费时又费力,因此必须使用程序来自动生成。Dev-C++自带了这个功能,只需在文件>导出菜单中选择“到HTML”,就会提供一份当前源码的HTML副本。然而:
- 我当时竟然还没发现有这个工具(太惨了)
- 它不支持自动显示行号
- 它以
span class=xxx
的形式输出每一个标记,这样会使HTML文件占用的空间变得很大。现在已经没有浏览器还不支持CSS1的后代选择器了,我更愿意以自己的方式处理,如将i标签用作注释,em标签用于保留字,b标签用于标点符号,等等。这样子不是更加简洁吗?
另一个解决方案是,使用例如highlight.js之类的库。我不想那样做,因为
- 我还看不懂js代码,尤其是有些地方的判断用到了正则表达式,简直搞得我头晕脑胀的。对我来说,代码完全看不懂,开源了就跟没开似的。
- 个性化比较差,感觉网上到处都是这种类型的代码高亮,千篇一律。(虽然我也知道,改个CSS文件就行了吧。)
- 可能用到了一些高级特性,对旧浏览器的支持不好。
- 他们搞得太复杂啦!比如同时支持多语言。我暂时不需要那么多的功能,只想搞好我最迫切需要的的C语言,来个极简化的代码。
于是我用自己最擅长的C语言开发了一个程序,可以用HTML打印C++代码,和Dev-C++中一样丰富多彩。其实我早在四年以前就有了这个想法,今天总算是实现啦!和原版生成的HTML效果几乎一样,除了一些特殊的地方会有差别,例如在宏定义的中间穿插注释时(具体的见test-all.c,第26行与第66行那里),这算是Dev-cpp模仿者目前版本一个已知的bug,以后会完善的。
源码如下:
- /*
- * Description: Dev-cpp Imitator
- * Version: 0.1
- * Date: 2025-08-15
- * Author: Shurui Zhang
- * Contact: https://qowk.xyz/contact/
- */
-
#include <stdio.h>
#include <ctype.h>
#include <string.h>
-
- char l[200];
-
- enum Line_State {
- none,
- string_literal,
- preprocessor_directive,
- single_line_comment,
- multiple_line_comment
- } line_state;
-
- typedef enum {
- NUMBER,
- KEYWORD,
- STRING,
- SYMBOL,
- COMMENT,
- PREPROCESSOR,
- IDENTIFIER
- } token_types;
-
- const char *opening_tag[] = {
-
"<a>"
, "<em>"
, "<q>"
, "<b>"
, "<i>"
, "<p>"
, ""
- };
-
- const char *closing_tag[] = {
-
"</a>"
, "</em>"
, "</q>"
, "</b>"
, "</i>"
, "</p>"
, ""
- };
-
- /* range: [start, end) */
- void outputHTML(token_types style, int start, int end)
- {
- static token_types last_style = IDENTIFIER;
- if (style != last_style) {
- /* merge consecutive elements */
- printf(
"%s%s"
, closing_tag[last_style], opening_tag[style]);
- last_style = style;
- }
- for (; start < end; start++) {
- /* check for escaped characters */
- if (l[start] == '<') printf(
"<"
);
- else if (l[start] == '>') printf(
">"
);
- else if (l[start] == '&') printf(
"&"
);
- else putchar(l[start]);
- }
- }
-
- int is_keyword(int start, int end)
- {
- const char *keywords[] = {
-
"alignas"
, "alignof"
, "and"
, "and_eq"
, "asm"
, "auto"
, "bitand"
,
-
"bitor"
, "bool"
, "break"
, "case"
, "catch"
, "char"
, "char16_t"
,
-
"char32_t"
, "class"
, "compl"
, "const"
, "const_cast"
,
-
"constexpr"
, "continue"
, "decltype"
, "default"
, "delete"
, "do"
,
-
"double"
, "dynamic_cast"
, "else"
, "enum"
, "explicit"
, "export"
,
-
"extern"
, "false"
, "float"
, "for"
, "friend"
, "goto"
, "if"
,
-
"inline"
, "int"
, "long"
, "mutable"
, "namespace"
, "new"
,
-
"noexcept"
, "not"
, "not_eq"
, "nullptr"
, "operator"
, "or"
,
-
"or_eq"
, "private"
, "protected"
, "public"
, "register"
,
-
"reinterpret_cast"
, "return"
, "short"
, "signed"
, "sizeof"
,
-
"static"
, "static_assert"
, "static_cast"
, "struct"
, "switch"
,
-
"template"
, "this"
, "thread_local"
, "throw"
, "true"
, "try"
,
-
"typedef"
, "typeid"
, "typename"
, "union"
, "unsigned"
, "using"
,
-
"virtual"
, "void"
, "volatile"
, "wchar_t"
, "while"
, "xor"
,
-
"xor_eq"
- };
- char t;
- int i;
- t = l[end];
- l[end] = '\0';
- for (i = 0; i < sizeof keywords / sizeof *keywords; i++) {
- if (!strcmp(&l[start], keywords[i])) {
- l[end] = t;
- return 1;
- }
- }
- l[end] = t;
- return 0;
- }
-
- void process_line()
- {
- int head = 0, tail = 0;
- int len;
-
- /* #define DEBUG */
#ifdef DEBUG
- for (; l[head]; head++) {
- printf(
"%x "
, l[head]);
- }
- return;
#endif
-
- len = strlen(l);
-
- if (len == sizeof l - 1) {
- fputs(
"line character buffer limit exceeded"
, stderr);
- return;
- }
-
- /* strip trivial LF and CR characters */
- if (l[len - 1] == '\n')
- l[--len] = '\0';
-
- if (l[len - 1] == '\r')
- l[--len] = '\0';
-
- switch (line_state)
- do {
- head = tail;
-
- default:
- if (isspace(l[tail])) {
- putchar(l[tail++]);
- continue;
- }
-
- if (isalpha(l[tail]) || l[tail] == '_') {
- do tail++;
- while (isalnum(l[tail]) || l[tail] == '_');
- outputHTML(is_keyword(head, tail) ?
- KEYWORD : IDENTIFIER, head, tail);
- continue;
- }
-
- if (l[tail] == '#') {
- case preprocessor_directive:
- /* repeat until meet comments */
- do tail++;
- while (l[tail] && (l[tail] != '/'
- || (l[tail + 1] != '/'
- && l[tail + 1] != '*')));
- if (!l[tail] && l[tail - 1] == '\\')
- line_state = preprocessor_directive;
- outputHTML(PREPROCESSOR, head, tail);
- if (!l[tail] && l[tail - 1] == '\\') return;
- continue;
- }
-
- if (l[tail] == '"') {
- tail++;
- case string_literal:
- /* When l[tail] = '\0', it is (maybe)
- a C++11 raw string literal, or syntax error */
- while (l[tail] != '"' && l[tail]) {
- if (l[tail] == '\\' && !l[++tail]) {
- /* line wrap */
- line_state = string_literal;
- outputHTML(STRING, head, tail);
- return;
- }
- tail++;
- }
- tail++;
- outputHTML(STRING, head, tail);
- continue;
- }
-
- if (l[tail] == '\'') {
- do {
- if (l[tail] == '\\' && l[tail + 1])
- tail++;
- tail++;
- } while (l[tail] != '\'' && l[tail]);
- tail++;
- outputHTML(IDENTIFIER, head, tail);
- continue;
- }
-
- if (l[tail] == '/' && l[tail + 1] == '/') {
- case single_line_comment:
- line_state = l[len - 1] == '\\' ?
- single_line_comment : none;
- outputHTML(COMMENT, head, len);
- return;
- }
-
- if (l[tail] == '/' && l[tail + 1] == '*') {
- tail += 2;
- case multiple_line_comment:
- /* repeat until the end of the multiple-line comment */
- while (l[tail] != '*' || l[tail + 1] != '/') {
- if (!l[tail]) {
- /* at the end of line, the multiple line comment will
- continue */
- line_state = multiple_line_comment;
- outputHTML(COMMENT, head, tail);
- return;
- }
- tail++;
- }
- tail += 2;
- outputHTML(COMMENT, head, tail);
- continue;
- }
-
- if (isdigit(l[tail]) ||
- (l[tail] == '.' && isdigit(l[tail + 1]))) {
- do {
- tail++;
- if (toupper(l[tail]) == 'E'
- && (l[tail + 1] == '+'
- || l[tail + 1] == '-')){
- tail += 2;
- }
- } while (isalnum(l[tail]) || l[tail] == '.');
- outputHTML(NUMBER, head, tail);
- continue;
- }
-
- if (ispunct(l[tail])) {
- tail++;
- outputHTML(SYMBOL, head, tail);
- continue;
- }
-
- if (l[tail]) {
- fputs(
"Unknown Error"
, stderr);
- return;
- }
- } while (l[tail]);
-
- line_state = none;
- }
-
- int main()
- {
- printf(
-
"<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN' "
-
"'http://www.w3.org/TR/html4/strict.dtd'>"
-
"<meta http-equiv=content-type "
-
"content='text/html; charset=utf-8'>"
/* charset=gb2312 */
-
"<style type='text/css'>"
-
"ol.devcppimitator i {color:#0078d7} "
-
"ol.devcppimitator q "
-
"{color:blue; font-weight:bold; quotes:none} "
-
"ol.devcppimitator b {color:red} "
-
"ol.devcppimitator em "
-
"{color:black; font-weight:bold; font-style:normal} "
-
"ol.devcppimitator a {color:purple} "
-
"ol.devcppimitator p {display:inline; color:green} "
-
"ol.devcppimitator li {white-space:pre} "
/* ; tab-size:4 */
- /* tab-size is in CSS3 */
- );
- printf(
-
"ol.devcppimitator {width:800px; margin:0 auto; "
-
"padding-left:60px; border:solid 1px; "
-
"font-family:Consolas,'Courier New',monospace}"
-
"</style>"
-
"<title>Dev-cpp Imitator</title>"
-
"<div style='width:1000px; margin:0 auto; padding:10px;"
-
"border:solid 1px'>"
-
"<p>The sample is as follows:\n"
-
"<ol class=devcppimitator>\n"
- );
-
- while (fgets(l, sizeof l, stdin)) {
- printf(
"<li>"
);
- process_line();
-
- /* flush buffer */
- outputHTML(IDENTIFIER, 0, 0);
-
- /* This is necessary, because an empty li element
- doesn't occupy a line, resulting in copy failures. */
- putchar('\n');
- }
-
- printf(
"</ol>\n</div>"
);
- return 0;
- }
编译与运行的方法:
先把上面的代码全部复制下来,保存为a.c。
在DOS/Windows上:
先用Dev-C++等工具直接编译,然后在cmd命令行中输入(使用重定向的方法)
a.exe < a.c > a.htm
如果你用PowerShell那么则是
Get-Content a.c | .\a.exe | Out-File a.htm -Encoding utf8
在Linux上:
gcc a.c -o a.out
./a.out < a.c > a.htm
再用浏览器打开a.htm文件,就可以看到被高亮的代码了。
最后将a.htm中的整个ol元素(每行代码都会在HTML文件当中占用一行,相当于从第二行复制到倒数第二行)粘贴到你网页的HTML代码中间去,记得在头部添加文档级样式表,或者以外部样式表的方式,就比如我网站的theme02.css。