interface GetAttributionScriptParams {
  config: {
    clickLookbackWindowDays: number;
    viewLookbackWindow: number;
    attributionRule: string;
  };
  publisherSlugs: string[];
}

export const getAttributionScript = ({
  config,
  publisherSlugs,
}: GetAttributionScriptParams) => {
  const snake_case_config = {
    attribution_rule: config.attributionRule,
    click_lookback_window_days: config.clickLookbackWindowDays,
    view_lookback_window: config.viewLookbackWindow,
  };
  return `
from datetime import datetime, timedelta
import decentriq_util
import json
import math
import os
import pandas as pd


# Define processing parameters
measurement_config = json.loads('${JSON.stringify(snake_case_config)}')
pubs = json.loads('${JSON.stringify(publisherSlugs)}')


# Helper function to read CSV files
def read_csv(file_path, columns):
    if os.path.exists(file_path):
        return pd.read_csv(file_path, names=columns)
    else:
        print(f"File not found: {file_path}")
        return pd.DataFrame(columns=columns)

# Read publisher data
pub_data = {
    pub: {
        'pub_match': decentriq_util.read_tabular_data(f"/input/{pub}_match"),
        'pub_exposure': decentriq_util.read_tabular_data(f"/input/{pub}_exposure").fillna(0)
    }
    for pub in pubs
}

# Read brand data
brand_match = decentriq_util.read_tabular_data("/input/brand_match")
brand_conversions = decentriq_util.read_tabular_data("/input/brand_conversions")

# Debug print
print("Publisher data and Brand data loaded")

#create_cross_walks
cross_walks = {
    pub: brand_match.merge(
        tables['pub_match'],
        left_on='matching_id',
        right_on='matching_id',
        suffixes=('_brand', '_pub'),
        how='inner'
    )[['brand_user_id','pub_user_id']]
    for pub, tables in pub_data.items()
}
matched_pub_data = {
    pub: tables['pub_exposure'].merge(
        cross_walks[pub],
        left_on='pub_user_id',
        right_on='pub_user_id',
        suffixes=('', '_crosswalk'),
        how='inner'
    ).drop(columns=['pub_user_id',])
    for pub, tables in pub_data.items()
}

# Combine publisher data
for pub, tables in matched_pub_data.items():
    tables['pub'] = pub
combined_pub_exposure = pd.concat([matched_pub_data[pub] for pub in pubs], ignore_index=True)

# Convert timestamps to datetime objects
combined_pub_exposure['timestamp'] = pd.to_datetime(combined_pub_exposure['timestamp'], unit='s')
brand_conversions['timestamp'] = pd.to_datetime(brand_conversions['timestamp'], unit='s')

# Merge brand matches with combined publisher exposures
combined_pub_exposure = combined_pub_exposure.merge(
    brand_match,
    left_on='brand_user_id',
    right_on='brand_user_id',
    suffixes=('', '_brand')
)

# Define attribution functions
def last_click_attribution(conversions, exposures, click_lookback_window_days=1, view_lookback_window_days=None):
    lookback_click_window = timedelta(days=click_lookback_window_days)
    lookback_view_window = timedelta(days=int(-1 if view_lookback_window_days is None else view_lookback_window_days))
    exposures = exposures.sort_values('timestamp')  
    def find_last_click(conversion):
        lookback_click_time = conversion['timestamp'] - lookback_click_window
        lookback_view_time = conversion['timestamp'] - lookback_view_window
        relevant_exposures = exposures[
            (exposures['brand_user_id'] == conversion['brand_user_id']) & 
            (exposures['timestamp'] <= conversion['timestamp']) & 
            (
                ((exposures['timestamp'] > lookback_click_time) & (exposures['click_flag'] == 1)) |
                ((exposures['timestamp'] > lookback_view_time) & (exposures['click_flag'] != 1))
            )
        ]
        if not relevant_exposures.empty:
            last_click = relevant_exposures[relevant_exposures['click_flag'] == 1]
            if not last_click.empty:
                last_exposure = last_click.iloc[-1]
            else:
                last_exposure = relevant_exposures.iloc[-1]
            return pd.Series({
                'product_category': conversion['product_category'],
                'conversion_value': conversion['value'],
                'attributed_campaign_id': last_exposure['campaign_id'],
                'pub': last_exposure['pub']
            })
        else:
            return pd.Series({
                'product_category': conversion['product_category'],
                'conversion_value': conversion['value'],
                'attributed_campaign_id': None,
                'pub': None
            })

    return conversions.apply(find_last_click, axis=1)

def even_credit_attribution(conversions, exposures, click_lookback_window_days=1, view_lookback_window_days=None):
    lookback_click_window = timedelta(days=click_lookback_window_days)
    lookback_view_window = timedelta(days=int(-1 if view_lookback_window_days is None else view_lookback_window_days)) # set to -1 if none, this makes it impossible to match
    exposures = exposures.sort_values('timestamp')
    
    def assign_even_credit(conversion):
        lookback_click_time = conversion['timestamp'] - lookback_click_window
        lookback_view_time = conversion['timestamp'] - lookback_view_window
        relevant_exposures = exposures[
            (exposures['brand_user_id'] == conversion['brand_user_id']) & 
            (exposures['timestamp'] <= conversion['timestamp']) & 
            (
                ((exposures['timestamp'] > lookback_click_time) & (exposures['click_flag'] == 1)) |
                ((exposures['timestamp'] > lookback_view_time) & (exposures['click_flag'] != 1))
            )
        ]
        num_exposures = len(relevant_exposures)
        if num_exposures > 0:
            even_value = conversion['value'] / num_exposures
            return pd.DataFrame({
                'product_category': [conversion['product_category']] * num_exposures,
                'conversion_value': [even_value] * num_exposures,
                'attributed_campaign_id': relevant_exposures['campaign_id'],
                'pub': relevant_exposures['pub']
            })
        else:
            return pd.DataFrame({
                'product_category': [conversion['product_category']],
                'conversion_value': [conversion['value']],
                'attributed_campaign_id': [None],
                'pub': [None]
            })

    # Use \`apply\` and \`concat\` to flatten the DataFrame
    attribution_list = conversions.apply(assign_even_credit, axis=1).tolist()
    attributions = pd.concat(attribution_list, ignore_index=True)
    
    return attributions

def first_last_attribution(conversions, exposures, click_lookback_window_days=1, view_lookback_window_days=None):
    lookback_click_window = timedelta(days=click_lookback_window_days)
    lookback_view_window = timedelta(days=int(-1 if view_lookback_window_days is None else view_lookback_window_days))
    exposures = exposures.sort_values('timestamp')
    
    def find_first_and_last_touch(conversion):
        lookback_click_time = conversion['timestamp'] - lookback_click_window
        lookback_view_time = conversion['timestamp'] - lookback_view_window
        relevant_exposures = exposures[
            (exposures['brand_user_id'] == conversion['brand_user_id']) & 
            (exposures['timestamp'] <= conversion['timestamp']) & 
            (
                ((exposures['timestamp'] > lookback_click_time) & (exposures['click_flag'] == 1)) |
                ((exposures['timestamp'] > lookback_view_time) & (exposures['click_flag'] != 1))
            )
        ]
        
        if not relevant_exposures.empty:
            first_exposure = relevant_exposures.iloc[0]
            last_exposure = relevant_exposures.iloc[-1]
            first_touch_value = conversion['value'] / 2
            last_touch_value = conversion['value'] / 2
            
            first_touch = pd.Series({
                'pub': first_exposure['pub'],
                'product_category': conversion['product_category'],
                'attributed_campaign_id': first_exposure['campaign_id'],
                'conversion_value': first_touch_value
            })
            
            last_touch = pd.Series({
                'pub': last_exposure['pub'],
                'product_category': conversion['product_category'],
                'attributed_campaign_id': last_exposure['campaign_id'],
                'conversion_value': last_touch_value
            })
            
            return pd.DataFrame([first_touch, last_touch])
        
        else:  #empty list
            return pd.DataFrame(columns=['pub', 'campaign_id', 'conversion_value'])
        
    all_attributions = pd.concat(conversions.apply(find_first_and_last_touch, axis=1).values)
    return all_attributions

# Perform Attribution
attribution_rule = measurement_config["attribution_rule"]
click_lookback_window_days = measurement_config["click_lookback_window_days"]
view_lookback_window_days = measurement_config["view_lookback_window"]
if attribution_rule == "last_click":
    attributions = last_click_attribution(brand_conversions, combined_pub_exposure, click_lookback_window_days=click_lookback_window_days, view_lookback_window_days=view_lookback_window_days)
elif attribution_rule == "even_credit":
    attributions = even_credit_attribution(brand_conversions, combined_pub_exposure, click_lookback_window_days=click_lookback_window_days, view_lookback_window_days=view_lookback_window_days)
elif attribution_rule == "first_last":
    attributions = first_last_attribution(brand_conversions, combined_pub_exposure, click_lookback_window_days=click_lookback_window_days, view_lookback_window_days=view_lookback_window_days)
else:
    raise ValueError(f'Unknown attribution_rule: {attribution_rule}')

# Debug print
print("Attributions calculated")

def get_campaign_statistics(attributions, exposures):
    
    # Calculate number of impressions and clicks
    impressions_clicks = exposures.groupby(['pub', 'campaign_id']).agg(
        impressions=('timestamp', 'count'),
        clicks=('click_flag', 'sum')
    ).reset_index()
    
    # Calculate number of conversions and total attributed value
    conversions_value = attributions.groupby(['pub', 'attributed_campaign_id']).agg(
        conversions=('conversion_value', 'count'),
        total_value=('conversion_value', 'sum')
    ).reset_index()
    
    # Combine the statistics
    campaign_stats = impressions_clicks.merge(
        conversions_value,
        left_on=['pub', 'campaign_id'],
        right_on=['pub', 'attributed_campaign_id'],
        how='outer'
    ).fillna(0)
    
    # Rename columns for clarity
    campaign_stats = campaign_stats.rename(columns={
        'pub': 'publisher',
        'campaign_id': 'campaign',
        'impressions': 'num_impressions',
        'clicks': 'num_clicks',
        'conversions': 'num_conversions',
        'total_value': 'total_value_attributed'
    })
    
    return campaign_stats

# Calculate the statistics
campaign_stats = get_campaign_statistics(attributions, combined_pub_exposure)

def get_product_statistics(attributions):
    product_stats = attributions.groupby(['product_category']).agg(
            conversions=('conversion_value', 'count'),
            total_value=('conversion_value', 'sum')
        ).reset_index()
    product_stats = product_stats.rename(columns={
            'conversions': 'num_conversions',
            'total_value': 'total_value_attributed'
        })
    return product_stats

product_stats = get_product_statistics(attributions)
# Debug print
print("Statistics calculated")

# Write statistics to CSV
campaign_stats.to_csv('/output/campaign_stats.csv', index=False)
campaign_stats.to_pickle('/output/campaign_stats.pkl')
product_stats.to_csv('/output/product_stats.csv', index=False)
product_stats.to_pickle('/output/product_stats.pkl')

print("Statistics calculation completed. Results saved to 'campaign_stats.csv' and 'product_stats.csv'.")

`;
};
