diagnoseLag.m 7.31 KB
Newer Older
Claas Faber's avatar
Claas Faber committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
function [all_strengths,lag_value,cropped_ref,cropped_lag] = ...
    diagnoseLag(ref_data,ref_dv,lag_data,lag_dv,analysis_type,ref_month,maximum_lag,start_year,stop_year); 

% Perform a cross analysis between a reference and lag dataset to find the lag 
% at which the relationship between the variables is strongest, 
% according to the chosen analysis type. 
% 
% SYNTAX: 
%     
%     [all_strengths,lag_value,cropped_ref,cropped_lag] = ...
%             diagnoseLag(ref_data,ref_dv,lag_data,lag_dv,analysis_type,ref_month,maximum_lag,start_year,stop_year); 
%             
%     OUTPUT: 
%         all_strengths: Feedback strength values for all lags 
%         lag_value:     The lag value at which the relationship between >ref_data< and 
%                        >lag_data< is strongest. Assumes that the time stepping for 
%                        both datasets is the same. 
%         cropped_ref:   >ref_data<, cropped according to the >lag_value<
%         cropped_lag:   >lag_data<, cropped accoridng to the >lag_value< 
%         
%     INPUT: 
%         ref_data:      "reference" dataset; this will be kept constant. 
%         ref_dv:        Matlab date vector associated with >ref_data<. 
%         lag_data:      "lagged" dataset: This will be shifted in time incrementially, 
%                        to find the strongest (positive) relationship between the two 
%                        datasets. Note that only positive relationships are considered 
%                        here. 
%         lag_dv:        Matlab date vector associated with >lag_data<. 
%         analysis_type: Char, specifies how the "feedback" strength is diagnosed.
%                        Either of 
%                        "acc":  Anomaly correlation, not suitable for very short datasets
%                        "reg":  reguar linear regression, where >response< is regressed on >forcing<
%                        "rreg": robust lineat regression, where >response< is regressed on >forcing<
%         ref_month:     Month of >ref_data<, around which the >lag_data< will be shifted. 
%         maximum_lag:   Maximum lag for which >lag_data< will be shifted against 
%                        >ref_data<, both in positive and negative time steps. 
%         start_year:    Reference for the year in which the analysis should start 
%         stop_year:     Reference for the year in which the analysis should stop 

% ----------- % 
% CHECK INPUT % 
% ----------- % 

    % assign an ID to the analysis type
    % for use with switch 
    if strcmp(analysis_type,'acc')
        analysis_id  = 1;
    elseif strcmp(analysis_type,'reg')
        analysis_id  = 2;
    elseif strcmp(analysis_type,'rreg')
        analysis_id  = 3;
    else
        error_string = sprintf('Analysis type >%s< not known.\nChoose either of acc, reg, or rreg.',analysis_type); 
        error(error_string); 
    end
    
    % sensible month? 
    if ( ( ref_month < 1 ) | ( ref_month > 12 ) )
        error('Month should be between 1 and 12.'); 
    end 
    
% ------------ % 
% PREPARATIONS % 
% ------------ % 

    % define all lag values 
    all_lags = [-maximum_lag:1:maximum_lag]; 
    n_lags   = length(all_lags); 
    
    % prepare storage 
    all_strengths = zeros(size(all_lags)); 
    
    all_ref_indices = []; 
    all_lag_indices = []; 
    
% ---------------------- % 
% PERFORM CROSS ANALYSIS % 
% ---------------------- % 

    for c_lag = 1:n_lags
    
        lag = all_lags(c_lag);
    
        % ADAPT START AND STOP YEARS?
        % +++++++++++++++++++++++++++

            % initial state
            lag_month     = ref_month + lag;
            negative_wrap = ( lag_month <  1);
            positive_wrap = ( lag_month > 12 );
            n_wraps       = 0;

            % start and stop years for both time series
            start_ref = start_year;
            stop_ref  = stop_year;
            start_lag = start_year;
            stop_lag  = stop_year;

            % hard-wire whether to shift up or down
            % the lag time
            lag_up   = positive_wrap;
            lag_down = negative_wrap;

            % wrap around the lag month
            while ( negative_wrap | positive_wrap );

                if negative_wrap
                    lag_month = lag_month + 12;
                else
                    lag_month = lag_month - 12;
                end

                % keep track of how many times you wrapped around
                % the true month
                n_wraps = n_wraps + 1;

                % wrap finished?
                negative_wrap = ( lag_month <  1);
                positive_wrap = ( lag_month > 12 );

            end

            % match the range where both time series exist
            % lag_month > 12: lag time series needs to start later
            % lag_month < 1 : ref time series needs to start later
            if lag_up
                stop_ref  = stop_ref  - n_wraps;
                start_lag = start_lag + n_wraps;
            elseif lag_down
                start_ref = start_ref + n_wraps;
                stop_lag  = stop_lag  - n_wraps;
            end

        % SELECT CORRECT TIME
        % +++++++++++++++++++

            % reference
            is_ref_year  = ( ( ref_dv(:,1) >= start_ref ) & ...
                             ( ref_dv(:,1) <= stop_ref ) );
            is_ref_month = ( ref_dv(:,2) == ref_month );
            is_ref       = ( is_ref_year & is_ref_month );

            % lag
            is_lag_year  = ( ( lag_dv(:,1) >= start_lag ) & ...
                             ( lag_dv(:,1) <= stop_lag ) );
            is_lag_month = ( lag_dv(:,2) == lag_month );
            is_lag       = ( is_lag_year & is_lag_month );
        
        % CROP DATA AND STORE CROPPED DATA 
        % ++++++++++++++++++++++++++++++++

            % crop data 
            ref_series = ref_data(is_ref);
            lag_series = lag_data(is_lag);
            
            % compatibility check 
            % do the cropped datasets have the same size? 
            size_ref = size(ref_series); 
            size_lag = size(lag_series); 

            same_size = sum(~( size_ref == size_lag )); 

            if same_size ~= 0 
                error_string = sprintf(['Cropped datasets do not have the same size.\n',...
                    'Make sure that they cover the specified period completely.']); 
                error(error_string); 
            end 
            
            % save data 
            n_series = length(ref_series);

            all_ref_indices(c_lag,1:n_series) = ref_series;
            all_lag_indices(c_lag,1:n_series) = lag_series;
            
        
        % PERFORM FEEDBACK CALCULATION 
        % ++++++++++++++++++++++++++++
        
            all_strengths(c_lag) = calculateFeedback(ref_series,lag_series,analysis_type); 
    
    % for: loop through all lag values 
    end 
    
    % FIND PEAK RELATIONSHIP 
    % ++++++++++++++++++++++ 
    
        % replace zeros by NaNs
        all_ref_indices(all_ref_indices == 0.) = NaN;
        all_lag_indices(all_lag_indices == 0.) = NaN;
        
        % find the strongest relationship; only consider positive 
        % values for now 
        [peak_strengths,peak_index] = max(all_strengths);

        % actual lag
        lag_value = all_lags(peak_index);
        
        % cropped data 
        cropped_ref = squeeze(all_ref_indices(peak_index,:)); 
        cropped_lag = squeeze(all_lag_indices(peak_index,:));