我一直在 HP Proliant 服务器上运行高度并发的应用程序。该应用程序是我用 erlang 编写的文件系统索引器。它为在文件系统上找到的每个文件夹生成一个进程,并将所有文件路径记录在碎片化的 Mnesia 数据库中。 (数据库由disc_only_copies
可以查看表的类型及其文件系统的屏幕截图here.)
执行遍历文件系统的高强度工作的代码片段如下所示:
%%% -------- COPYRIGHT NOTICE --------------------------------------------------------------------
%% @author Muzaaya Joshua, <[email protected]> [http://joshanderlang.blogspot.com]
%% @version 1.0 free software, but modification prohibited
%% @copyright Muzaaya Joshua (file_scavenger-1.0) 2011 - 2012 . All rights reserved
%% @reference <a href="http://www.erlang.org">OpenSource Erlang WebSite</a>
%%
%%% ---------------- EDOC INTRODUCTION TO THE MODULE ----------------------------------------------
%% @doc This module provides the low level APIs for reading, writing,
%% searching, joining and moving within directories.The module implementation
%% took place on @date at @time.
%% @end
-module(file_scavenger_utilities).
%%% ------- EXPORTS -------------------------------------------------------------------------------
-compile(export_all).
%%% ------- INCLUDES -----------------------------------------------------------------------------
%%% -------- MACROS ------------------------------------------------------------------------------
-define(IS_FOLDER(X),filelib:is_dir(X)).
-define(IS_FILE(X),filelib:is_file(X)).
-define(FAILED_TO_LIST_DIR(X),error_logger:error_report(["*** File Scavenger Utilities Error ***** ",{error,"Failed to List Directory"},{directory,X}])).
-define(NOT_DIR(X),error_logger:error_report(["*** File Scavenger Utilities Error ***** ",{error,"Not a Directory"},{alleged,X}])).
-define(NOT_FILE(X),error_logger:error_report(["*** File Scavenger Utilities Error ***** ",{error,"Not a File"},{alleged,X}])).
%%%--------- TYPES -------------------------------------------------------------------------------
%% @type dir() = string().
%% Must be containing forward slashes, not back slashes. Must not end with a slash
%% after the exact directory.e.g this is wrong: "C:/Program Files/SomeDirectory/"
%% but this is right: "C:/Program Files/SomeDirectory"
%% @type file_path() = string().
%% Must be containing forward slashes, not back slashes.
%% Should include the file extension as well e.g "C:/Program Files/SomeFile.pdf"
%% -----------------------------------------------------------------------------------------------
%% @doc Enters a directory and executes the fun ForEachFileFound/2 for each file it finds
%% If it finds a directory, it executes the fun %% ForEachDirFound/2.
%% Both funs above take the parent Dir as the first Argument. Then, it will spawn an
%% erlang process that will spread the found Directory too in the same way as the parent directory
%% was spread. The process of spreading goes on and on until every File (wether its in a nested
%% Directory) is registered by its full path.
%% @end
%%
%% @spec spread_directory(dir(),dir(),funtion(),function())-> ok.
spread_directory(Dir,Top_Directory,ForEachFileFound,ForEachDirFound) when is_function(ForEachFileFound),is_function(ForEachDirFound) ->
case ?IS_FOLDER(Dir) of
false -> ?NOT_DIR(Dir);
true ->
F = fun(X)->
FileOrDir = filename:absname_join(Dir,X),
case ?IS_FOLDER(FileOrDir) of
true ->
(catch ForEachDirFound(Top_Directory,FileOrDir)),
spawn(fun() -> ?MODULE:spread_directory(FileOrDir,Top_Directory,ForEachFileFound,ForEachDirFound) end);
false ->
case ?IS_FILE(FileOrDir) of
false -> {error,not_a_file,FileOrDir};
true -> (catch ForEachFileFound(Top_Directory,FileOrDir))
end
end
end,
case file:list_dir(Dir) of
{error,_} -> ?FAILED_TO_LIST_DIR(Dir);
{ok,List} -> lists:foreach(F,List)
end
end.
功能spread_directory/4
是通用的,需要两个funs
。一个乐趣:ForEachFileFound/2
与最顶层目录、找到的文件一起使用,并用它做任何事情以及其他有趣的事情:ForEachDirFound/2
与最顶层目录(它找到的文件夹)一起使用,并以任何它想要的方式使用它。
我用于此应用程序的启动脚本可确保 erlang 能够生成尽可能多的进程。一旦进程完成对文件夹的索引,它就会退出。
#!/usr/bin/env sh
echo "Starting File Scavenger System. Layer 1 on the P2P File Sharing System....."
erl \
-name [email protected] \
+P 13421779 \
-pa ./ebin ./lib/*/ebin ./include \
-mnesia dir '"./database"' \
-mnesia dump_log_write_threshold 10000 \
-eval "application:load(file_scavenger)" \
-eval "application:start(file_scavenger)"
有一个 gen_server 将密集模块与我记录所有路径的数据库连接起来。下面显示了开始 spread_directory 工作的片段:
handle_cast(index_dirs,#scavenger{directory_paths = Dirs} = State)->
{File,Folder} = case {State#scavenger.verbose,State#scavenger.verbose_to} of
{true,tty} ->
{
fun(TopDir,Fl)->
io:format(" File: ~p~n",[Fl]),
file_scavenger_database:insert_file(filename:basename(Fl),file,Fl,TopDir,filename:extension(Fl))
end,
fun(TopDir,Fd) ->
io:format(" Folder: ~p~n",[Fd]),
file_scavenger_database:insert_file(Fd,folder,Fd,TopDir,undefined)
end
};
{true,SomeFile}->
{
fun(TopDir,Fl)->
os:cmd("echo File: " ++ Fl ++ " >> " ++ SomeFile),
file_scavenger_database:insert_file(filename:basename(Fl),file,Fl,TopDir,filename:extension(Fl))
end,
fun(TopDir,Fd)->
os:cmd("echo Folder: " ++ Fd ++ " >> " ++ SomeFile),
file_scavenger_database:insert_file(Fd,folder,Fd,TopDir,undefined)
end
}
end,
Main = fun(Dir) ->
error_logger:info_msg("*** File scavenger Server indexing directory: ~p~n",[Dir]),
spawn(fun() -> file_scavenger_utilities:spread_directory(Dir,Dir,File,Folder) end)
end,
lists:foreach(Main,Dirs),
{noreply,State};
handle_cast(stop, State) -> {stop, normal, State}.
更多源详细信息可以在整个应用程序中找到。
应用程序的整个源代码和构建可以在这里找到:File_scavenger-1.0.zip.
现在,我在服务器(HP Proliant G6,包含 Intel 处理器(2 个处理器,每个 4 个内核,每个内核速度 2.4 GHz,8 MB 缓存大小)、20 GB RAM 大小、1.5 TB 磁盘空间)上启动应用程序。现在,2这些高功率机器由我们使用。系统数据库应该在两台服务器之间复制。每台服务器都运行 Solaris 10, 64 位),其终端现在如下所示:
bash-3.00# sh file_scavenger.sh
Starting File Scavenger System. Layer 1 on the P2P File Sharing System.....
Erlang R14B03 (erts-5.8.4) [source] [smp:8:8] [rq:8] [async-threads:0] [hipe] [kernel-poll:false]
Eshell V5.8.4 (abort with ^G)
([email protected])1>
=INFO REPORT==== 18-Aug-2011::09:36:04 ===
Starting File Scavenger Database......
=INFO REPORT==== 18-Aug-2011::09:36:04 ===
Database Successfully Started....
=INFO REPORT==== 18-Aug-2011::09:36:04 ===
Starting File Scavenger Database......
=INFO REPORT==== 18-Aug-2011::09:36:04 ===
Database Successfully Started....
=INFO REPORT==== 18-Aug-2011::09:36:04 ===
File Scavenger Server starting with default verbose settings....
(fil[email protected])1> file_scavenger_server:index_dirs().
服务器开始运行并向终端详细显示它找到的所有文件和文件夹。服务器配备过多 RAM (20 GB) 和 Swap 空间(Swap 为 16 GB)。然而,它运行了大约 18 个小时,最后,erlang 虚拟机报告了以下内容:
File: "/proc/4324/root/opt/csw/gcc4/share/locale/ja/LC_MESSAGES/gcc.mo"
Folder: "/proc/4324/root/opt/csw/gcc4/share/locale/da"
Folder: "/proc/4324/root/opt/csw/gcc4/share/locale/es/LC_MESSAGES"
File: "/proc/4324/root/proc/4984/root/.thumbnails/normal/dc259e3897e8af4b379c6d956b6c1393.png"
File: "/proc/4324/root/proc/4984/root/.thumbnails/fail/gnome-thumbnail-factory/223c19786421b7101d14075bdec46f61.png"
File: "/proc/4324/root/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/install-tools/mkheaders"
File: "/proc/4324/root/opt/csw/gcc4/libexec/gcc/i386-pc-solaris2.10/4.5.1/cc1plus"
File: "/proc/4324/root/opt/csw/gcc4/lib/libsupc++.la"
Crash dump was written to: erl_crash.dump
eheap_alloc: Cannot allocate 153052320 bytes of memory (of type "heap").
Abort - core dumped
bash-3.00#
问题1.有了如此强大的服务器,为什么操作系统无法为应用程序(它是唯一正在运行的应用程序)提供这样的内存?
问题2。我启动的 Erlang 模拟器被指示能够生成所需数量的进程。价值+P 13421779
。 Erlang VM 是否无法访问该内存或无法将其分配给其进程?
问题3。对于 Solaris,它看到一个进程:epmd
,可能包含并启动数千个微线程。我可以对 Solaris 进行哪些配置,以便永远不会停止我的应用程序,无论它有多少“内存消耗”?可用交换空间为 16 GB,RAM 20 GB,老实说,肯定有问题。
问题 4.我可以对 Erlang 模拟器进行哪些配置,以避免这些堆内存崩溃转储,特别是当它可能需要的所有内存在服务器上可用时?如果 Erlang 仍然无法将这些内存分配给简单的文件系统索引器(以及它的高度并发),我将如何在此服务器上运行更多消耗内存的应用程序?
最后,我可以做的所有其他调整,以避免在如此强大的硬件上出现堆内存问题,都是受欢迎的。提前致谢