diff --git a/apps/roster/src/roster_oam.erl b/apps/roster/src/roster_oam.erl index 9ba81e731afb4b0fab447a68394e9494ab0d6e33..0000986e59e7e706e3c8ff4621428b423c686111 100644 --- a/apps/roster/src/roster_oam.erl +++ b/apps/roster/src/roster_oam.erl @@ -5,8 +5,13 @@ , ensure_tables_loaded/0 , ensure_tables_loaded/2 , revert_db_sync/1 + , rename_node/1 + , commit_rename/0 + , undo_db_clone/0 , remove_replicas/1 ]). +-define(LOG(Fmt, Args), io:fwrite(Fmt ++ "~n", Args)). + node_running() -> lists:keymember(roster, 1, application:which_applications()). @@ -14,6 +19,10 @@ disable() -> [stop_app(A) || A <- apps_to_stop()], ok. +%% This function re-enables a node having been deactivated by `disable/0`. +%% It expects the other node to be stopped, removes changes made by +%% db cloning, then restarts all applications that are expected to run. +%% revert_db_sync(DelNode) -> %% setting master nodes won't matter if the tables are already %% loaded, but if the node has restarted and is hanging on table @@ -22,16 +31,121 @@ revert_db_sync(DelNode) -> true -> {error, {still_running, DelNode}}; false -> - mnesia:set_master_nodes(mnesia:system_info(db_nodes) -- [DelNode]), [stop_app(A) || A <- lists:reverse(apps_to_restart())], - remove_replicas(DelNode), - application:start(mnesia, permanent), - ok = ensure_tables_loaded(), - mnesia:set_master_nodes([]), % resets master_nodes setting + undo_db_clone(DelNode), [application:ensure_all_started(A) || A <- apps_to_restart()], ok end. +%% This is equivalent to `undo_db_clone(OtherNode)` where OtherNode +%% is the single other node known to mnesia. The assumption is that +%% we have tried doing a `rename_node/1`, adding one new node to the +%% schema, and now want to undo those changes. Run in the shell of the +%% original node. +undo_db_clone() -> + [DelNode] = mnesia:system_info(db_nodes) -- [node()], + undo_db_clone(DelNode). + +%% This function reverts the changes done by `rename_node/1`. Run it in the +%% shell of the original node. +%% It removes the other node from the schema, also removing the replicas. +%% Then it ensures that the tables are loaded, setting mnesia master nodes +%% temporarily to ensure that the node will actually start loading the tables. +undo_db_clone(DelNode) -> + mnesia:set_master_nodes(mnesia:system_info(db_nodes) -- [DelNode]), + remove_replicas(DelNode), + ok = application:ensure_started(mnesia, permanent), + ok = ensure_tables_loaded(), + mnesia:set_master_nodes([]), % resets master_nodes setting + ok. + +%% This is supposed to be run as +%% `erl -name NewName -setcookie OldCookie -eval 'roster_oam:rename_node(OldNode)' +rename_node(OldNode) -> + io:fwrite("Wohoo!!! OldNode = ~p~n", [OldNode]), + mnesia:start([{extra_db_nodes, [OldNode]}]), + clone_mnesia_db(OldNode). + +%% Run this in the shell of the new node once you are ready to switch over +%% It will stop the old node, rewrite the node name in the vm.args +%% (saving a copy of the original), then stop the current node. +%% Start it again using `make start` +commit_rename() -> + [OldNode] = mnesia:system_info(db_nodes) -- [node()], + ok = stop_node(OldNode), + ok = modify_vm_args(OldNode), + init:stop(). + +stop_node(N) -> + case lists:member(N, nodes()) of + true -> + erlang:monitor_node(N, true), + try rpc:call(N, init, stop, []), + receive + {nodedown, N} -> + ok + after 10000 -> + ?LOG("No nodedown after 10 seconds", []), + error + end + after + erlang:monitor_node(N, false) + end; + false -> + ok + end. + +modify_vm_args(OldNode) -> + {ok, Bin} = file:read_file("vm.args"), + {A,B,C} = os:timestamp(), + {ok, _} = file:copy("vm.args", str("vm.args.old.~w.~w.~w", [A,B,C])), + NewBin = binary:replace(Bin, atom_to_binary(OldNode, latin1), atom_to_binary(node(), latin1)), + file:write_file("vm.args", NewBin). + +str(Fmt, Args) -> + lists:flatten(io_lib:format(Fmt, Args)). + +clone_mnesia_db(Node) -> + {atomic, ok} = ensure_table_copy_type(schema, disc_copies), + ?LOG("Mnesia connected to ~p", [Node]), + Tabs = tabs(), + add_me_to_tabs(Tabs, Node), + ensure_tables_loaded(), + mnesia:info(). + +ensure_table_copy_type(Tab, Type) -> + case mnesia_lib:storage_type_at_node(node(), Tab) of + unknown -> + mnesia:add_table_copy(Tab, node(), Type); + Type -> + {atomic, ok}; + _Other -> + mnesia:change_table_copy_type(Tab, node(), Type) + end. + +add_me_to_tabs(Tabs, OrigNode) -> + TabsAndTypes = tabs_and_types(Tabs, OrigNode), % [{Tab, Type}] + Wy = integer_to_list(max_length([Ty || {_,Ty} <- TabsAndTypes])), + Wt = integer_to_list(max_length([T || {T,_ } <- TabsAndTypes])), + lists:foreach( + fun({T, Type}) -> + T0 = erlang:system_time(millisecond), + {atomic, ok} = ensure_table_copy_type(T, Type), + T1 = erlang:system_time(millisecond), + ?LOG("Ensured local replica (~-" ++ Wy ++ "w) of " + "~-" ++ Wt ++ "w (~w ms)", + [Type, T, T1-T0]) + end, TabsAndTypes), + ?LOG("Ensured local copies for tabs", []), + ok. + +max_length(L) -> + lists:max([length(atom_to_list(X)) || X <- L]). + +tabs_and_types(Tabs, Node) -> + [{T, mnesia_lib:storage_type_at_node(Node, T)} || T <- Tabs]. + + stop_app(n2o) -> catch roster:stop_vnodes(), application:stop(n2o);